Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[linux-2.6-microblaze.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34
35 #include "../irq_remapping.h"
36
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
40         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
41         bool                    ignore_unhandled;
42         bool                    print_entry;
43 };
44
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66
67 extern const struct iommu_ops intel_iommu_ops;
68
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /*
133          * Ignore devices that have a domain number higher than what can
134          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135          */
136         if (pci_domain_nr(dev->bus) > U16_MAX)
137                 return NULL;
138
139         /* Only generate path[] for device addition event */
140         if (event == BUS_NOTIFY_ADD_DEVICE)
141                 for (tmp = dev; tmp; tmp = tmp->bus->self)
142                         level++;
143
144         size = struct_size(info, path, level);
145         if (size <= sizeof(dmar_pci_notify_info_buf)) {
146                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147         } else {
148                 info = kzalloc(size, GFP_KERNEL);
149                 if (!info) {
150                         pr_warn("Out of memory when allocating notify_info "
151                                 "for %s.\n", pci_name(dev));
152                         if (dmar_dev_scope_status == 0)
153                                 dmar_dev_scope_status = -ENOMEM;
154                         return NULL;
155                 }
156         }
157
158         info->event = event;
159         info->dev = dev;
160         info->seg = pci_domain_nr(dev->bus);
161         info->level = level;
162         if (event == BUS_NOTIFY_ADD_DEVICE) {
163                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
164                         level--;
165                         info->path[level].bus = tmp->bus->number;
166                         info->path[level].device = PCI_SLOT(tmp->devfn);
167                         info->path[level].function = PCI_FUNC(tmp->devfn);
168                         if (pci_is_root_bus(tmp->bus))
169                                 info->bus = tmp->bus->number;
170                 }
171         }
172
173         return info;
174 }
175
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178         if ((void *)info != dmar_pci_notify_info_buf)
179                 kfree(info);
180 }
181
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183                                 struct acpi_dmar_pci_path *path, int count)
184 {
185         int i;
186
187         if (info->bus != bus)
188                 goto fallback;
189         if (info->level != count)
190                 goto fallback;
191
192         for (i = 0; i < count; i++) {
193                 if (path[i].device != info->path[i].device ||
194                     path[i].function != info->path[i].function)
195                         goto fallback;
196         }
197
198         return true;
199
200 fallback:
201
202         if (count != 1)
203                 return false;
204
205         i = info->level - 1;
206         if (bus              == info->path[i].bus &&
207             path[0].device   == info->path[i].device &&
208             path[0].function == info->path[i].function) {
209                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210                         bus, path[0].device, path[0].function);
211                 return true;
212         }
213
214         return false;
215 }
216
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219                           void *start, void*end, u16 segment,
220                           struct dmar_dev_scope *devices,
221                           int devices_cnt)
222 {
223         int i, level;
224         struct device *tmp, *dev = &info->dev->dev;
225         struct acpi_dmar_device_scope *scope;
226         struct acpi_dmar_pci_path *path;
227
228         if (segment != info->seg)
229                 return 0;
230
231         for (; start < end; start += scope->length) {
232                 scope = start;
233                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235                         continue;
236
237                 path = (struct acpi_dmar_pci_path *)(scope + 1);
238                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
239                 if (!dmar_match_pci_path(info, scope->bus, path, level))
240                         continue;
241
242                 /*
243                  * We expect devices with endpoint scope to have normal PCI
244                  * headers, and devices with bridge scope to have bridge PCI
245                  * headers.  However PCI NTB devices may be listed in the
246                  * DMAR table with bridge scope, even though they have a
247                  * normal PCI header.  NTB devices are identified by class
248                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249                  * for this special case.
250                  */
251                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256                         pr_warn("Device scope type does not match for %s\n",
257                                 pci_name(info->dev));
258                         return -EINVAL;
259                 }
260
261                 for_each_dev_scope(devices, devices_cnt, i, tmp)
262                         if (tmp == NULL) {
263                                 devices[i].bus = info->dev->bus->number;
264                                 devices[i].devfn = info->dev->devfn;
265                                 rcu_assign_pointer(devices[i].dev,
266                                                    get_device(dev));
267                                 return 1;
268                         }
269                 BUG_ON(i >= devices_cnt);
270         }
271
272         return 0;
273 }
274
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276                           struct dmar_dev_scope *devices, int count)
277 {
278         int index;
279         struct device *tmp;
280
281         if (info->seg != segment)
282                 return 0;
283
284         for_each_active_dev_scope(devices, count, index, tmp)
285                 if (tmp == &info->dev->dev) {
286                         RCU_INIT_POINTER(devices[index].dev, NULL);
287                         synchronize_rcu();
288                         put_device(tmp);
289                         return 1;
290                 }
291
292         return 0;
293 }
294
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297         int ret = 0;
298         struct dmar_drhd_unit *dmaru;
299         struct acpi_dmar_hardware_unit *drhd;
300
301         for_each_drhd_unit(dmaru) {
302                 if (dmaru->include_all)
303                         continue;
304
305                 drhd = container_of(dmaru->hdr,
306                                     struct acpi_dmar_hardware_unit, header);
307                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308                                 ((void *)drhd) + drhd->header.length,
309                                 dmaru->segment,
310                                 dmaru->devices, dmaru->devices_cnt);
311                 if (ret)
312                         break;
313         }
314         if (ret >= 0)
315                 ret = dmar_iommu_notify_scope_dev(info);
316         if (ret < 0 && dmar_dev_scope_status == 0)
317                 dmar_dev_scope_status = ret;
318
319         if (ret >= 0)
320                 intel_irq_remap_add_device(info);
321
322         return ret;
323 }
324
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327         struct dmar_drhd_unit *dmaru;
328
329         for_each_drhd_unit(dmaru)
330                 if (dmar_remove_dev_scope(info, dmaru->segment,
331                         dmaru->devices, dmaru->devices_cnt))
332                         break;
333         dmar_iommu_notify_scope_dev(info);
334 }
335
336 static int dmar_pci_bus_notifier(struct notifier_block *nb,
337                                  unsigned long action, void *data)
338 {
339         struct pci_dev *pdev = to_pci_dev(data);
340         struct dmar_pci_notify_info *info;
341
342         /* Only care about add/remove events for physical functions.
343          * For VFs we actually do the lookup based on the corresponding
344          * PF in device_to_iommu() anyway. */
345         if (pdev->is_virtfn)
346                 return NOTIFY_DONE;
347         if (action != BUS_NOTIFY_ADD_DEVICE &&
348             action != BUS_NOTIFY_REMOVED_DEVICE)
349                 return NOTIFY_DONE;
350
351         info = dmar_alloc_pci_notify_info(pdev, action);
352         if (!info)
353                 return NOTIFY_DONE;
354
355         down_write(&dmar_global_lock);
356         if (action == BUS_NOTIFY_ADD_DEVICE)
357                 dmar_pci_bus_add_dev(info);
358         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
359                 dmar_pci_bus_del_dev(info);
360         up_write(&dmar_global_lock);
361
362         dmar_free_pci_notify_info(info);
363
364         return NOTIFY_OK;
365 }
366
367 static struct notifier_block dmar_pci_bus_nb = {
368         .notifier_call = dmar_pci_bus_notifier,
369         .priority = INT_MIN,
370 };
371
372 static struct dmar_drhd_unit *
373 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
374 {
375         struct dmar_drhd_unit *dmaru;
376
377         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
378                                 dmar_rcu_check())
379                 if (dmaru->segment == drhd->segment &&
380                     dmaru->reg_base_addr == drhd->address)
381                         return dmaru;
382
383         return NULL;
384 }
385
386 /*
387  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
388  * structure which uniquely represent one DMA remapping hardware unit
389  * present in the platform
390  */
391 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
392 {
393         struct acpi_dmar_hardware_unit *drhd;
394         struct dmar_drhd_unit *dmaru;
395         int ret;
396
397         drhd = (struct acpi_dmar_hardware_unit *)header;
398         dmaru = dmar_find_dmaru(drhd);
399         if (dmaru)
400                 goto out;
401
402         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
403         if (!dmaru)
404                 return -ENOMEM;
405
406         /*
407          * If header is allocated from slab by ACPI _DSM method, we need to
408          * copy the content because the memory buffer will be freed on return.
409          */
410         dmaru->hdr = (void *)(dmaru + 1);
411         memcpy(dmaru->hdr, header, header->length);
412         dmaru->reg_base_addr = drhd->address;
413         dmaru->segment = drhd->segment;
414         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
415         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
416                                               ((void *)drhd) + drhd->header.length,
417                                               &dmaru->devices_cnt);
418         if (dmaru->devices_cnt && dmaru->devices == NULL) {
419                 kfree(dmaru);
420                 return -ENOMEM;
421         }
422
423         ret = alloc_iommu(dmaru);
424         if (ret) {
425                 dmar_free_dev_scope(&dmaru->devices,
426                                     &dmaru->devices_cnt);
427                 kfree(dmaru);
428                 return ret;
429         }
430         dmar_register_drhd_unit(dmaru);
431
432 out:
433         if (arg)
434                 (*(int *)arg)++;
435
436         return 0;
437 }
438
439 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
440 {
441         if (dmaru->devices && dmaru->devices_cnt)
442                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
443         if (dmaru->iommu)
444                 free_iommu(dmaru->iommu);
445         kfree(dmaru);
446 }
447
448 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
449                                       void *arg)
450 {
451         struct acpi_dmar_andd *andd = (void *)header;
452
453         /* Check for NUL termination within the designated length */
454         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
455                 pr_warn(FW_BUG
456                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
457                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
458                            dmi_get_system_info(DMI_BIOS_VENDOR),
459                            dmi_get_system_info(DMI_BIOS_VERSION),
460                            dmi_get_system_info(DMI_PRODUCT_VERSION));
461                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
462                 return -EINVAL;
463         }
464         pr_info("ANDD device: %x name: %s\n", andd->device_number,
465                 andd->device_name);
466
467         return 0;
468 }
469
470 #ifdef CONFIG_ACPI_NUMA
471 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
472 {
473         struct acpi_dmar_rhsa *rhsa;
474         struct dmar_drhd_unit *drhd;
475
476         rhsa = (struct acpi_dmar_rhsa *)header;
477         for_each_drhd_unit(drhd) {
478                 if (drhd->reg_base_addr == rhsa->base_address) {
479                         int node = pxm_to_node(rhsa->proximity_domain);
480
481                         if (!node_online(node))
482                                 node = NUMA_NO_NODE;
483                         drhd->iommu->node = node;
484                         return 0;
485                 }
486         }
487         pr_warn(FW_BUG
488                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
489                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
490                 rhsa->base_address,
491                 dmi_get_system_info(DMI_BIOS_VENDOR),
492                 dmi_get_system_info(DMI_BIOS_VERSION),
493                 dmi_get_system_info(DMI_PRODUCT_VERSION));
494         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
495
496         return 0;
497 }
498 #else
499 #define dmar_parse_one_rhsa             dmar_res_noop
500 #endif
501
502 static void
503 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
504 {
505         struct acpi_dmar_hardware_unit *drhd;
506         struct acpi_dmar_reserved_memory *rmrr;
507         struct acpi_dmar_atsr *atsr;
508         struct acpi_dmar_rhsa *rhsa;
509
510         switch (header->type) {
511         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
512                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
513                                     header);
514                 pr_info("DRHD base: %#016Lx flags: %#x\n",
515                         (unsigned long long)drhd->address, drhd->flags);
516                 break;
517         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
518                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
519                                     header);
520                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
521                         (unsigned long long)rmrr->base_address,
522                         (unsigned long long)rmrr->end_address);
523                 break;
524         case ACPI_DMAR_TYPE_ROOT_ATS:
525                 atsr = container_of(header, struct acpi_dmar_atsr, header);
526                 pr_info("ATSR flags: %#x\n", atsr->flags);
527                 break;
528         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
529                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
530                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
531                        (unsigned long long)rhsa->base_address,
532                        rhsa->proximity_domain);
533                 break;
534         case ACPI_DMAR_TYPE_NAMESPACE:
535                 /* We don't print this here because we need to sanity-check
536                    it first. So print it in dmar_parse_one_andd() instead. */
537                 break;
538         }
539 }
540
541 /**
542  * dmar_table_detect - checks to see if the platform supports DMAR devices
543  */
544 static int __init dmar_table_detect(void)
545 {
546         acpi_status status = AE_OK;
547
548         /* if we could find DMAR table, then there are DMAR devices */
549         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
550
551         if (ACPI_SUCCESS(status) && !dmar_tbl) {
552                 pr_warn("Unable to map DMAR\n");
553                 status = AE_NOT_FOUND;
554         }
555
556         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
557 }
558
559 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
560                                        size_t len, struct dmar_res_callback *cb)
561 {
562         struct acpi_dmar_header *iter, *next;
563         struct acpi_dmar_header *end = ((void *)start) + len;
564
565         for (iter = start; iter < end; iter = next) {
566                 next = (void *)iter + iter->length;
567                 if (iter->length == 0) {
568                         /* Avoid looping forever on bad ACPI tables */
569                         pr_debug(FW_BUG "Invalid 0-length structure\n");
570                         break;
571                 } else if (next > end) {
572                         /* Avoid passing table end */
573                         pr_warn(FW_BUG "Record passes table end\n");
574                         return -EINVAL;
575                 }
576
577                 if (cb->print_entry)
578                         dmar_table_print_dmar_entry(iter);
579
580                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
581                         /* continue for forward compatibility */
582                         pr_debug("Unknown DMAR structure type %d\n",
583                                  iter->type);
584                 } else if (cb->cb[iter->type]) {
585                         int ret;
586
587                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
588                         if (ret)
589                                 return ret;
590                 } else if (!cb->ignore_unhandled) {
591                         pr_warn("No handler for DMAR structure type %d\n",
592                                 iter->type);
593                         return -EINVAL;
594                 }
595         }
596
597         return 0;
598 }
599
600 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
601                                        struct dmar_res_callback *cb)
602 {
603         return dmar_walk_remapping_entries((void *)(dmar + 1),
604                         dmar->header.length - sizeof(*dmar), cb);
605 }
606
607 /**
608  * parse_dmar_table - parses the DMA reporting table
609  */
610 static int __init
611 parse_dmar_table(void)
612 {
613         struct acpi_table_dmar *dmar;
614         int drhd_count = 0;
615         int ret;
616         struct dmar_res_callback cb = {
617                 .print_entry = true,
618                 .ignore_unhandled = true,
619                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
620                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
621                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
622                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
623                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
624                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
625         };
626
627         /*
628          * Do it again, earlier dmar_tbl mapping could be mapped with
629          * fixed map.
630          */
631         dmar_table_detect();
632
633         /*
634          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
635          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
636          */
637         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
638
639         dmar = (struct acpi_table_dmar *)dmar_tbl;
640         if (!dmar)
641                 return -ENODEV;
642
643         if (dmar->width < PAGE_SHIFT - 1) {
644                 pr_warn("Invalid DMAR haw\n");
645                 return -EINVAL;
646         }
647
648         pr_info("Host address width %d\n", dmar->width + 1);
649         ret = dmar_walk_dmar_table(dmar, &cb);
650         if (ret == 0 && drhd_count == 0)
651                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
652
653         return ret;
654 }
655
656 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
657                                  int cnt, struct pci_dev *dev)
658 {
659         int index;
660         struct device *tmp;
661
662         while (dev) {
663                 for_each_active_dev_scope(devices, cnt, index, tmp)
664                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
665                                 return 1;
666
667                 /* Check our parent */
668                 dev = dev->bus->self;
669         }
670
671         return 0;
672 }
673
674 struct dmar_drhd_unit *
675 dmar_find_matched_drhd_unit(struct pci_dev *dev)
676 {
677         struct dmar_drhd_unit *dmaru;
678         struct acpi_dmar_hardware_unit *drhd;
679
680         dev = pci_physfn(dev);
681
682         rcu_read_lock();
683         for_each_drhd_unit(dmaru) {
684                 drhd = container_of(dmaru->hdr,
685                                     struct acpi_dmar_hardware_unit,
686                                     header);
687
688                 if (dmaru->include_all &&
689                     drhd->segment == pci_domain_nr(dev->bus))
690                         goto out;
691
692                 if (dmar_pci_device_match(dmaru->devices,
693                                           dmaru->devices_cnt, dev))
694                         goto out;
695         }
696         dmaru = NULL;
697 out:
698         rcu_read_unlock();
699
700         return dmaru;
701 }
702
703 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
704                                               struct acpi_device *adev)
705 {
706         struct dmar_drhd_unit *dmaru;
707         struct acpi_dmar_hardware_unit *drhd;
708         struct acpi_dmar_device_scope *scope;
709         struct device *tmp;
710         int i;
711         struct acpi_dmar_pci_path *path;
712
713         for_each_drhd_unit(dmaru) {
714                 drhd = container_of(dmaru->hdr,
715                                     struct acpi_dmar_hardware_unit,
716                                     header);
717
718                 for (scope = (void *)(drhd + 1);
719                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
720                      scope = ((void *)scope) + scope->length) {
721                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
722                                 continue;
723                         if (scope->enumeration_id != device_number)
724                                 continue;
725
726                         path = (void *)(scope + 1);
727                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
728                                 dev_name(&adev->dev), dmaru->reg_base_addr,
729                                 scope->bus, path->device, path->function);
730                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
731                                 if (tmp == NULL) {
732                                         dmaru->devices[i].bus = scope->bus;
733                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
734                                                                             path->function);
735                                         rcu_assign_pointer(dmaru->devices[i].dev,
736                                                            get_device(&adev->dev));
737                                         return;
738                                 }
739                         BUG_ON(i >= dmaru->devices_cnt);
740                 }
741         }
742         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
743                 device_number, dev_name(&adev->dev));
744 }
745
746 static int __init dmar_acpi_dev_scope_init(void)
747 {
748         struct acpi_dmar_andd *andd;
749
750         if (dmar_tbl == NULL)
751                 return -ENODEV;
752
753         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
754              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
755              andd = ((void *)andd) + andd->header.length) {
756                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
757                         acpi_handle h;
758                         struct acpi_device *adev;
759
760                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
761                                                           andd->device_name,
762                                                           &h))) {
763                                 pr_err("Failed to find handle for ACPI object %s\n",
764                                        andd->device_name);
765                                 continue;
766                         }
767                         if (acpi_bus_get_device(h, &adev)) {
768                                 pr_err("Failed to get device for ACPI object %s\n",
769                                        andd->device_name);
770                                 continue;
771                         }
772                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
773                 }
774         }
775         return 0;
776 }
777
778 int __init dmar_dev_scope_init(void)
779 {
780         struct pci_dev *dev = NULL;
781         struct dmar_pci_notify_info *info;
782
783         if (dmar_dev_scope_status != 1)
784                 return dmar_dev_scope_status;
785
786         if (list_empty(&dmar_drhd_units)) {
787                 dmar_dev_scope_status = -ENODEV;
788         } else {
789                 dmar_dev_scope_status = 0;
790
791                 dmar_acpi_dev_scope_init();
792
793                 for_each_pci_dev(dev) {
794                         if (dev->is_virtfn)
795                                 continue;
796
797                         info = dmar_alloc_pci_notify_info(dev,
798                                         BUS_NOTIFY_ADD_DEVICE);
799                         if (!info) {
800                                 return dmar_dev_scope_status;
801                         } else {
802                                 dmar_pci_bus_add_dev(info);
803                                 dmar_free_pci_notify_info(info);
804                         }
805                 }
806         }
807
808         return dmar_dev_scope_status;
809 }
810
811 void __init dmar_register_bus_notifier(void)
812 {
813         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
814 }
815
816
817 int __init dmar_table_init(void)
818 {
819         static int dmar_table_initialized;
820         int ret;
821
822         if (dmar_table_initialized == 0) {
823                 ret = parse_dmar_table();
824                 if (ret < 0) {
825                         if (ret != -ENODEV)
826                                 pr_info("Parse DMAR table failure.\n");
827                 } else  if (list_empty(&dmar_drhd_units)) {
828                         pr_info("No DMAR devices found\n");
829                         ret = -ENODEV;
830                 }
831
832                 if (ret < 0)
833                         dmar_table_initialized = ret;
834                 else
835                         dmar_table_initialized = 1;
836         }
837
838         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
839 }
840
841 static void warn_invalid_dmar(u64 addr, const char *message)
842 {
843         pr_warn_once(FW_BUG
844                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
845                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
846                 addr, message,
847                 dmi_get_system_info(DMI_BIOS_VENDOR),
848                 dmi_get_system_info(DMI_BIOS_VERSION),
849                 dmi_get_system_info(DMI_PRODUCT_VERSION));
850         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
851 }
852
853 static int __ref
854 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
855 {
856         struct acpi_dmar_hardware_unit *drhd;
857         void __iomem *addr;
858         u64 cap, ecap;
859
860         drhd = (void *)entry;
861         if (!drhd->address) {
862                 warn_invalid_dmar(0, "");
863                 return -EINVAL;
864         }
865
866         if (arg)
867                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
868         else
869                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
870         if (!addr) {
871                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
872                 return -EINVAL;
873         }
874
875         cap = dmar_readq(addr + DMAR_CAP_REG);
876         ecap = dmar_readq(addr + DMAR_ECAP_REG);
877
878         if (arg)
879                 iounmap(addr);
880         else
881                 early_iounmap(addr, VTD_PAGE_SIZE);
882
883         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
884                 warn_invalid_dmar(drhd->address, " returns all ones");
885                 return -EINVAL;
886         }
887
888         return 0;
889 }
890
891 int __init detect_intel_iommu(void)
892 {
893         int ret;
894         struct dmar_res_callback validate_drhd_cb = {
895                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
896                 .ignore_unhandled = true,
897         };
898
899         down_write(&dmar_global_lock);
900         ret = dmar_table_detect();
901         if (!ret)
902                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
903                                            &validate_drhd_cb);
904         if (!ret && !no_iommu && !iommu_detected &&
905             (!dmar_disabled || dmar_platform_optin())) {
906                 iommu_detected = 1;
907                 /* Make sure ACS will be enabled */
908                 pci_request_acs();
909         }
910
911 #ifdef CONFIG_X86
912         if (!ret) {
913                 x86_init.iommu.iommu_init = intel_iommu_init;
914                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
915         }
916
917 #endif
918
919         if (dmar_tbl) {
920                 acpi_put_table(dmar_tbl);
921                 dmar_tbl = NULL;
922         }
923         up_write(&dmar_global_lock);
924
925         return ret ? ret : 1;
926 }
927
928 static void unmap_iommu(struct intel_iommu *iommu)
929 {
930         iounmap(iommu->reg);
931         release_mem_region(iommu->reg_phys, iommu->reg_size);
932 }
933
934 /**
935  * map_iommu: map the iommu's registers
936  * @iommu: the iommu to map
937  * @phys_addr: the physical address of the base resgister
938  *
939  * Memory map the iommu's registers.  Start w/ a single page, and
940  * possibly expand if that turns out to be insufficent.
941  */
942 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
943 {
944         int map_size, err=0;
945
946         iommu->reg_phys = phys_addr;
947         iommu->reg_size = VTD_PAGE_SIZE;
948
949         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
950                 pr_err("Can't reserve memory\n");
951                 err = -EBUSY;
952                 goto out;
953         }
954
955         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
956         if (!iommu->reg) {
957                 pr_err("Can't map the region\n");
958                 err = -ENOMEM;
959                 goto release;
960         }
961
962         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
963         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
964
965         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
966                 err = -EINVAL;
967                 warn_invalid_dmar(phys_addr, " returns all ones");
968                 goto unmap;
969         }
970         iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
971
972         /* the registers might be more than one page */
973         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
974                          cap_max_fault_reg_offset(iommu->cap));
975         map_size = VTD_PAGE_ALIGN(map_size);
976         if (map_size > iommu->reg_size) {
977                 iounmap(iommu->reg);
978                 release_mem_region(iommu->reg_phys, iommu->reg_size);
979                 iommu->reg_size = map_size;
980                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
981                                         iommu->name)) {
982                         pr_err("Can't reserve memory\n");
983                         err = -EBUSY;
984                         goto out;
985                 }
986                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
987                 if (!iommu->reg) {
988                         pr_err("Can't map the region\n");
989                         err = -ENOMEM;
990                         goto release;
991                 }
992         }
993         err = 0;
994         goto out;
995
996 unmap:
997         iounmap(iommu->reg);
998 release:
999         release_mem_region(iommu->reg_phys, iommu->reg_size);
1000 out:
1001         return err;
1002 }
1003
1004 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1005 {
1006         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1007                                             DMAR_UNITS_SUPPORTED);
1008         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1009                 iommu->seq_id = -1;
1010         } else {
1011                 set_bit(iommu->seq_id, dmar_seq_ids);
1012                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1013         }
1014
1015         return iommu->seq_id;
1016 }
1017
1018 static void dmar_free_seq_id(struct intel_iommu *iommu)
1019 {
1020         if (iommu->seq_id >= 0) {
1021                 clear_bit(iommu->seq_id, dmar_seq_ids);
1022                 iommu->seq_id = -1;
1023         }
1024 }
1025
1026 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1027 {
1028         struct intel_iommu *iommu;
1029         u32 ver, sts;
1030         int agaw = -1;
1031         int msagaw = -1;
1032         int err;
1033
1034         if (!drhd->reg_base_addr) {
1035                 warn_invalid_dmar(0, "");
1036                 return -EINVAL;
1037         }
1038
1039         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1040         if (!iommu)
1041                 return -ENOMEM;
1042
1043         if (dmar_alloc_seq_id(iommu) < 0) {
1044                 pr_err("Failed to allocate seq_id\n");
1045                 err = -ENOSPC;
1046                 goto error;
1047         }
1048
1049         err = map_iommu(iommu, drhd->reg_base_addr);
1050         if (err) {
1051                 pr_err("Failed to map %s\n", iommu->name);
1052                 goto error_free_seq_id;
1053         }
1054
1055         err = -EINVAL;
1056         if (cap_sagaw(iommu->cap) == 0) {
1057                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1058                         iommu->name);
1059                 drhd->ignored = 1;
1060         }
1061
1062         if (!drhd->ignored) {
1063                 agaw = iommu_calculate_agaw(iommu);
1064                 if (agaw < 0) {
1065                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1066                                iommu->seq_id);
1067                         drhd->ignored = 1;
1068                 }
1069         }
1070         if (!drhd->ignored) {
1071                 msagaw = iommu_calculate_max_sagaw(iommu);
1072                 if (msagaw < 0) {
1073                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1074                                iommu->seq_id);
1075                         drhd->ignored = 1;
1076                         agaw = -1;
1077                 }
1078         }
1079         iommu->agaw = agaw;
1080         iommu->msagaw = msagaw;
1081         iommu->segment = drhd->segment;
1082
1083         iommu->node = NUMA_NO_NODE;
1084
1085         ver = readl(iommu->reg + DMAR_VER_REG);
1086         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1087                 iommu->name,
1088                 (unsigned long long)drhd->reg_base_addr,
1089                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1090                 (unsigned long long)iommu->cap,
1091                 (unsigned long long)iommu->ecap);
1092
1093         /* Reflect status in gcmd */
1094         sts = readl(iommu->reg + DMAR_GSTS_REG);
1095         if (sts & DMA_GSTS_IRES)
1096                 iommu->gcmd |= DMA_GCMD_IRE;
1097         if (sts & DMA_GSTS_TES)
1098                 iommu->gcmd |= DMA_GCMD_TE;
1099         if (sts & DMA_GSTS_QIES)
1100                 iommu->gcmd |= DMA_GCMD_QIE;
1101
1102         raw_spin_lock_init(&iommu->register_lock);
1103
1104         /*
1105          * This is only for hotplug; at boot time intel_iommu_enabled won't
1106          * be set yet. When intel_iommu_init() runs, it registers the units
1107          * present at boot time, then sets intel_iommu_enabled.
1108          */
1109         if (intel_iommu_enabled && !drhd->ignored) {
1110                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1111                                              intel_iommu_groups,
1112                                              "%s", iommu->name);
1113                 if (err)
1114                         goto err_unmap;
1115
1116                 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1117
1118                 err = iommu_device_register(&iommu->iommu);
1119                 if (err)
1120                         goto err_unmap;
1121         }
1122
1123         drhd->iommu = iommu;
1124         iommu->drhd = drhd;
1125
1126         return 0;
1127
1128 err_unmap:
1129         unmap_iommu(iommu);
1130 error_free_seq_id:
1131         dmar_free_seq_id(iommu);
1132 error:
1133         kfree(iommu);
1134         return err;
1135 }
1136
1137 static void free_iommu(struct intel_iommu *iommu)
1138 {
1139         if (intel_iommu_enabled && iommu->iommu.ops) {
1140                 iommu_device_unregister(&iommu->iommu);
1141                 iommu_device_sysfs_remove(&iommu->iommu);
1142         }
1143
1144         if (iommu->irq) {
1145                 if (iommu->pr_irq) {
1146                         free_irq(iommu->pr_irq, iommu);
1147                         dmar_free_hwirq(iommu->pr_irq);
1148                         iommu->pr_irq = 0;
1149                 }
1150                 free_irq(iommu->irq, iommu);
1151                 dmar_free_hwirq(iommu->irq);
1152                 iommu->irq = 0;
1153         }
1154
1155         if (iommu->qi) {
1156                 free_page((unsigned long)iommu->qi->desc);
1157                 kfree(iommu->qi->desc_status);
1158                 kfree(iommu->qi);
1159         }
1160
1161         if (iommu->reg)
1162                 unmap_iommu(iommu);
1163
1164         dmar_free_seq_id(iommu);
1165         kfree(iommu);
1166 }
1167
1168 /*
1169  * Reclaim all the submitted descriptors which have completed its work.
1170  */
1171 static inline void reclaim_free_desc(struct q_inval *qi)
1172 {
1173         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1174                qi->desc_status[qi->free_tail] == QI_ABORT) {
1175                 qi->desc_status[qi->free_tail] = QI_FREE;
1176                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1177                 qi->free_cnt++;
1178         }
1179 }
1180
1181 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1182 {
1183         u32 fault;
1184         int head, tail;
1185         struct q_inval *qi = iommu->qi;
1186         int shift = qi_shift(iommu);
1187
1188         if (qi->desc_status[wait_index] == QI_ABORT)
1189                 return -EAGAIN;
1190
1191         fault = readl(iommu->reg + DMAR_FSTS_REG);
1192
1193         /*
1194          * If IQE happens, the head points to the descriptor associated
1195          * with the error. No new descriptors are fetched until the IQE
1196          * is cleared.
1197          */
1198         if (fault & DMA_FSTS_IQE) {
1199                 head = readl(iommu->reg + DMAR_IQH_REG);
1200                 if ((head >> shift) == index) {
1201                         struct qi_desc *desc = qi->desc + head;
1202
1203                         /*
1204                          * desc->qw2 and desc->qw3 are either reserved or
1205                          * used by software as private data. We won't print
1206                          * out these two qw's for security consideration.
1207                          */
1208                         pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1209                                (unsigned long long)desc->qw0,
1210                                (unsigned long long)desc->qw1);
1211                         memcpy(desc, qi->desc + (wait_index << shift),
1212                                1 << shift);
1213                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1214                         return -EINVAL;
1215                 }
1216         }
1217
1218         /*
1219          * If ITE happens, all pending wait_desc commands are aborted.
1220          * No new descriptors are fetched until the ITE is cleared.
1221          */
1222         if (fault & DMA_FSTS_ITE) {
1223                 head = readl(iommu->reg + DMAR_IQH_REG);
1224                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1225                 head |= 1;
1226                 tail = readl(iommu->reg + DMAR_IQT_REG);
1227                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1228
1229                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1230
1231                 do {
1232                         if (qi->desc_status[head] == QI_IN_USE)
1233                                 qi->desc_status[head] = QI_ABORT;
1234                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1235                 } while (head != tail);
1236
1237                 if (qi->desc_status[wait_index] == QI_ABORT)
1238                         return -EAGAIN;
1239         }
1240
1241         if (fault & DMA_FSTS_ICE)
1242                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1243
1244         return 0;
1245 }
1246
1247 /*
1248  * Function to submit invalidation descriptors of all types to the queued
1249  * invalidation interface(QI). Multiple descriptors can be submitted at a
1250  * time, a wait descriptor will be appended to each submission to ensure
1251  * hardware has completed the invalidation before return. Wait descriptors
1252  * can be part of the submission but it will not be polled for completion.
1253  */
1254 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1255                    unsigned int count, unsigned long options)
1256 {
1257         struct q_inval *qi = iommu->qi;
1258         struct qi_desc wait_desc;
1259         int wait_index, index;
1260         unsigned long flags;
1261         int offset, shift;
1262         int rc, i;
1263
1264         if (!qi)
1265                 return 0;
1266
1267 restart:
1268         rc = 0;
1269
1270         raw_spin_lock_irqsave(&qi->q_lock, flags);
1271         /*
1272          * Check if we have enough empty slots in the queue to submit,
1273          * the calculation is based on:
1274          * # of desc + 1 wait desc + 1 space between head and tail
1275          */
1276         while (qi->free_cnt < count + 2) {
1277                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1278                 cpu_relax();
1279                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1280         }
1281
1282         index = qi->free_head;
1283         wait_index = (index + count) % QI_LENGTH;
1284         shift = qi_shift(iommu);
1285
1286         for (i = 0; i < count; i++) {
1287                 offset = ((index + i) % QI_LENGTH) << shift;
1288                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1289                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1290         }
1291         qi->desc_status[wait_index] = QI_IN_USE;
1292
1293         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1294                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1295         if (options & QI_OPT_WAIT_DRAIN)
1296                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1297         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1298         wait_desc.qw2 = 0;
1299         wait_desc.qw3 = 0;
1300
1301         offset = wait_index << shift;
1302         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1303
1304         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1305         qi->free_cnt -= count + 1;
1306
1307         /*
1308          * update the HW tail register indicating the presence of
1309          * new descriptors.
1310          */
1311         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1312
1313         while (qi->desc_status[wait_index] != QI_DONE) {
1314                 /*
1315                  * We will leave the interrupts disabled, to prevent interrupt
1316                  * context to queue another cmd while a cmd is already submitted
1317                  * and waiting for completion on this cpu. This is to avoid
1318                  * a deadlock where the interrupt context can wait indefinitely
1319                  * for free slots in the queue.
1320                  */
1321                 rc = qi_check_fault(iommu, index, wait_index);
1322                 if (rc)
1323                         break;
1324
1325                 raw_spin_unlock(&qi->q_lock);
1326                 cpu_relax();
1327                 raw_spin_lock(&qi->q_lock);
1328         }
1329
1330         for (i = 0; i < count; i++)
1331                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1332
1333         reclaim_free_desc(qi);
1334         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1335
1336         if (rc == -EAGAIN)
1337                 goto restart;
1338
1339         return rc;
1340 }
1341
1342 /*
1343  * Flush the global interrupt entry cache.
1344  */
1345 void qi_global_iec(struct intel_iommu *iommu)
1346 {
1347         struct qi_desc desc;
1348
1349         desc.qw0 = QI_IEC_TYPE;
1350         desc.qw1 = 0;
1351         desc.qw2 = 0;
1352         desc.qw3 = 0;
1353
1354         /* should never fail */
1355         qi_submit_sync(iommu, &desc, 1, 0);
1356 }
1357
1358 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1359                       u64 type)
1360 {
1361         struct qi_desc desc;
1362
1363         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1364                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1365         desc.qw1 = 0;
1366         desc.qw2 = 0;
1367         desc.qw3 = 0;
1368
1369         qi_submit_sync(iommu, &desc, 1, 0);
1370 }
1371
1372 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1373                     unsigned int size_order, u64 type)
1374 {
1375         u8 dw = 0, dr = 0;
1376
1377         struct qi_desc desc;
1378         int ih = 0;
1379
1380         if (cap_write_drain(iommu->cap))
1381                 dw = 1;
1382
1383         if (cap_read_drain(iommu->cap))
1384                 dr = 1;
1385
1386         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1387                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1388         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1389                 | QI_IOTLB_AM(size_order);
1390         desc.qw2 = 0;
1391         desc.qw3 = 0;
1392
1393         qi_submit_sync(iommu, &desc, 1, 0);
1394 }
1395
1396 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1397                         u16 qdep, u64 addr, unsigned mask)
1398 {
1399         struct qi_desc desc;
1400
1401         if (mask) {
1402                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1403                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1404         } else
1405                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1406
1407         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1408                 qdep = 0;
1409
1410         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1411                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1412         desc.qw2 = 0;
1413         desc.qw3 = 0;
1414
1415         qi_submit_sync(iommu, &desc, 1, 0);
1416 }
1417
1418 /* PASID-based IOTLB invalidation */
1419 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1420                      unsigned long npages, bool ih)
1421 {
1422         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1423
1424         /*
1425          * npages == -1 means a PASID-selective invalidation, otherwise,
1426          * a positive value for Page-selective-within-PASID invalidation.
1427          * 0 is not a valid input.
1428          */
1429         if (WARN_ON(!npages)) {
1430                 pr_err("Invalid input npages = %ld\n", npages);
1431                 return;
1432         }
1433
1434         if (npages == -1) {
1435                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1436                                 QI_EIOTLB_DID(did) |
1437                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1438                                 QI_EIOTLB_TYPE;
1439                 desc.qw1 = 0;
1440         } else {
1441                 int mask = ilog2(__roundup_pow_of_two(npages));
1442                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1443
1444                 if (WARN_ON_ONCE(!ALIGN(addr, align)))
1445                         addr &= ~(align - 1);
1446
1447                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1448                                 QI_EIOTLB_DID(did) |
1449                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1450                                 QI_EIOTLB_TYPE;
1451                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1452                                 QI_EIOTLB_IH(ih) |
1453                                 QI_EIOTLB_AM(mask);
1454         }
1455
1456         qi_submit_sync(iommu, &desc, 1, 0);
1457 }
1458
1459 /* PASID-based device IOTLB Invalidate */
1460 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1461                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1462 {
1463         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1464         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1465
1466         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1467                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1468                 QI_DEV_IOTLB_PFSID(pfsid);
1469
1470         /*
1471          * If S bit is 0, we only flush a single page. If S bit is set,
1472          * The least significant zero bit indicates the invalidation address
1473          * range. VT-d spec 6.5.2.6.
1474          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1475          * size order = 0 is PAGE_SIZE 4KB
1476          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1477          * ECAP.
1478          */
1479         if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
1480                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1481                                     addr, size_order);
1482
1483         /* Take page address */
1484         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1485
1486         if (size_order) {
1487                 /*
1488                  * Existing 0s in address below size_order may be the least
1489                  * significant bit, we must set them to 1s to avoid having
1490                  * smaller size than desired.
1491                  */
1492                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1493                                         VTD_PAGE_SHIFT);
1494                 /* Clear size_order bit to indicate size */
1495                 desc.qw1 &= ~mask;
1496                 /* Set the S bit to indicate flushing more than 1 page */
1497                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1498         }
1499
1500         qi_submit_sync(iommu, &desc, 1, 0);
1501 }
1502
1503 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1504                           u64 granu, u32 pasid)
1505 {
1506         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1507
1508         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1509                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1510         qi_submit_sync(iommu, &desc, 1, 0);
1511 }
1512
1513 /*
1514  * Disable Queued Invalidation interface.
1515  */
1516 void dmar_disable_qi(struct intel_iommu *iommu)
1517 {
1518         unsigned long flags;
1519         u32 sts;
1520         cycles_t start_time = get_cycles();
1521
1522         if (!ecap_qis(iommu->ecap))
1523                 return;
1524
1525         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1526
1527         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1528         if (!(sts & DMA_GSTS_QIES))
1529                 goto end;
1530
1531         /*
1532          * Give a chance to HW to complete the pending invalidation requests.
1533          */
1534         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1535                 readl(iommu->reg + DMAR_IQH_REG)) &&
1536                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1537                 cpu_relax();
1538
1539         iommu->gcmd &= ~DMA_GCMD_QIE;
1540         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1541
1542         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1543                       !(sts & DMA_GSTS_QIES), sts);
1544 end:
1545         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1546 }
1547
1548 /*
1549  * Enable queued invalidation.
1550  */
1551 static void __dmar_enable_qi(struct intel_iommu *iommu)
1552 {
1553         u32 sts;
1554         unsigned long flags;
1555         struct q_inval *qi = iommu->qi;
1556         u64 val = virt_to_phys(qi->desc);
1557
1558         qi->free_head = qi->free_tail = 0;
1559         qi->free_cnt = QI_LENGTH;
1560
1561         /*
1562          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1563          * is present.
1564          */
1565         if (ecap_smts(iommu->ecap))
1566                 val |= (1 << 11) | 1;
1567
1568         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1569
1570         /* write zero to the tail reg */
1571         writel(0, iommu->reg + DMAR_IQT_REG);
1572
1573         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1574
1575         iommu->gcmd |= DMA_GCMD_QIE;
1576         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1577
1578         /* Make sure hardware complete it */
1579         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1580
1581         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1582 }
1583
1584 /*
1585  * Enable Queued Invalidation interface. This is a must to support
1586  * interrupt-remapping. Also used by DMA-remapping, which replaces
1587  * register based IOTLB invalidation.
1588  */
1589 int dmar_enable_qi(struct intel_iommu *iommu)
1590 {
1591         struct q_inval *qi;
1592         struct page *desc_page;
1593
1594         if (!ecap_qis(iommu->ecap))
1595                 return -ENOENT;
1596
1597         /*
1598          * queued invalidation is already setup and enabled.
1599          */
1600         if (iommu->qi)
1601                 return 0;
1602
1603         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1604         if (!iommu->qi)
1605                 return -ENOMEM;
1606
1607         qi = iommu->qi;
1608
1609         /*
1610          * Need two pages to accommodate 256 descriptors of 256 bits each
1611          * if the remapping hardware supports scalable mode translation.
1612          */
1613         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1614                                      !!ecap_smts(iommu->ecap));
1615         if (!desc_page) {
1616                 kfree(qi);
1617                 iommu->qi = NULL;
1618                 return -ENOMEM;
1619         }
1620
1621         qi->desc = page_address(desc_page);
1622
1623         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1624         if (!qi->desc_status) {
1625                 free_page((unsigned long) qi->desc);
1626                 kfree(qi);
1627                 iommu->qi = NULL;
1628                 return -ENOMEM;
1629         }
1630
1631         raw_spin_lock_init(&qi->q_lock);
1632
1633         __dmar_enable_qi(iommu);
1634
1635         return 0;
1636 }
1637
1638 /* iommu interrupt handling. Most stuff are MSI-like. */
1639
1640 enum faulttype {
1641         DMA_REMAP,
1642         INTR_REMAP,
1643         UNKNOWN,
1644 };
1645
1646 static const char *dma_remap_fault_reasons[] =
1647 {
1648         "Software",
1649         "Present bit in root entry is clear",
1650         "Present bit in context entry is clear",
1651         "Invalid context entry",
1652         "Access beyond MGAW",
1653         "PTE Write access is not set",
1654         "PTE Read access is not set",
1655         "Next page table ptr is invalid",
1656         "Root table address invalid",
1657         "Context table ptr is invalid",
1658         "non-zero reserved fields in RTP",
1659         "non-zero reserved fields in CTP",
1660         "non-zero reserved fields in PTE",
1661         "PCE for translation request specifies blocking",
1662 };
1663
1664 static const char * const dma_remap_sm_fault_reasons[] = {
1665         "SM: Invalid Root Table Address",
1666         "SM: TTM 0 for request with PASID",
1667         "SM: TTM 0 for page group request",
1668         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1669         "SM: Error attempting to access Root Entry",
1670         "SM: Present bit in Root Entry is clear",
1671         "SM: Non-zero reserved field set in Root Entry",
1672         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1673         "SM: Error attempting to access Context Entry",
1674         "SM: Present bit in Context Entry is clear",
1675         "SM: Non-zero reserved field set in the Context Entry",
1676         "SM: Invalid Context Entry",
1677         "SM: DTE field in Context Entry is clear",
1678         "SM: PASID Enable field in Context Entry is clear",
1679         "SM: PASID is larger than the max in Context Entry",
1680         "SM: PRE field in Context-Entry is clear",
1681         "SM: RID_PASID field error in Context-Entry",
1682         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1683         "SM: Error attempting to access the PASID Directory Entry",
1684         "SM: Present bit in Directory Entry is clear",
1685         "SM: Non-zero reserved field set in PASID Directory Entry",
1686         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1687         "SM: Error attempting to access PASID Table Entry",
1688         "SM: Present bit in PASID Table Entry is clear",
1689         "SM: Non-zero reserved field set in PASID Table Entry",
1690         "SM: Invalid Scalable-Mode PASID Table Entry",
1691         "SM: ERE field is clear in PASID Table Entry",
1692         "SM: SRE field is clear in PASID Table Entry",
1693         "Unknown", "Unknown",/* 0x5E-0x5F */
1694         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1695         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1696         "SM: Error attempting to access first-level paging entry",
1697         "SM: Present bit in first-level paging entry is clear",
1698         "SM: Non-zero reserved field set in first-level paging entry",
1699         "SM: Error attempting to access FL-PML4 entry",
1700         "SM: First-level entry address beyond MGAW in Nested translation",
1701         "SM: Read permission error in FL-PML4 entry in Nested translation",
1702         "SM: Read permission error in first-level paging entry in Nested translation",
1703         "SM: Write permission error in first-level paging entry in Nested translation",
1704         "SM: Error attempting to access second-level paging entry",
1705         "SM: Read/Write permission error in second-level paging entry",
1706         "SM: Non-zero reserved field set in second-level paging entry",
1707         "SM: Invalid second-level page table pointer",
1708         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1709         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1710         "SM: Address in first-level translation is not canonical",
1711         "SM: U/S set 0 for first-level translation with user privilege",
1712         "SM: No execute permission for request with PASID and ER=1",
1713         "SM: Address beyond the DMA hardware max",
1714         "SM: Second-level entry address beyond the max",
1715         "SM: No write permission for Write/AtomicOp request",
1716         "SM: No read permission for Read/AtomicOp request",
1717         "SM: Invalid address-interrupt address",
1718         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1719         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1720 };
1721
1722 static const char *irq_remap_fault_reasons[] =
1723 {
1724         "Detected reserved fields in the decoded interrupt-remapped request",
1725         "Interrupt index exceeded the interrupt-remapping table size",
1726         "Present field in the IRTE entry is clear",
1727         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1728         "Detected reserved fields in the IRTE entry",
1729         "Blocked a compatibility format interrupt request",
1730         "Blocked an interrupt request due to source-id verification failure",
1731 };
1732
1733 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1734 {
1735         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1736                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1737                 *fault_type = INTR_REMAP;
1738                 return irq_remap_fault_reasons[fault_reason - 0x20];
1739         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1740                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1741                 *fault_type = DMA_REMAP;
1742                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1743         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1744                 *fault_type = DMA_REMAP;
1745                 return dma_remap_fault_reasons[fault_reason];
1746         } else {
1747                 *fault_type = UNKNOWN;
1748                 return "Unknown";
1749         }
1750 }
1751
1752
1753 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1754 {
1755         if (iommu->irq == irq)
1756                 return DMAR_FECTL_REG;
1757         else if (iommu->pr_irq == irq)
1758                 return DMAR_PECTL_REG;
1759         else
1760                 BUG();
1761 }
1762
1763 void dmar_msi_unmask(struct irq_data *data)
1764 {
1765         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1766         int reg = dmar_msi_reg(iommu, data->irq);
1767         unsigned long flag;
1768
1769         /* unmask it */
1770         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1771         writel(0, iommu->reg + reg);
1772         /* Read a reg to force flush the post write */
1773         readl(iommu->reg + reg);
1774         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1775 }
1776
1777 void dmar_msi_mask(struct irq_data *data)
1778 {
1779         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1780         int reg = dmar_msi_reg(iommu, data->irq);
1781         unsigned long flag;
1782
1783         /* mask it */
1784         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1785         writel(DMA_FECTL_IM, iommu->reg + reg);
1786         /* Read a reg to force flush the post write */
1787         readl(iommu->reg + reg);
1788         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1789 }
1790
1791 void dmar_msi_write(int irq, struct msi_msg *msg)
1792 {
1793         struct intel_iommu *iommu = irq_get_handler_data(irq);
1794         int reg = dmar_msi_reg(iommu, irq);
1795         unsigned long flag;
1796
1797         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1798         writel(msg->data, iommu->reg + reg + 4);
1799         writel(msg->address_lo, iommu->reg + reg + 8);
1800         writel(msg->address_hi, iommu->reg + reg + 12);
1801         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1802 }
1803
1804 void dmar_msi_read(int irq, struct msi_msg *msg)
1805 {
1806         struct intel_iommu *iommu = irq_get_handler_data(irq);
1807         int reg = dmar_msi_reg(iommu, irq);
1808         unsigned long flag;
1809
1810         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1811         msg->data = readl(iommu->reg + reg + 4);
1812         msg->address_lo = readl(iommu->reg + reg + 8);
1813         msg->address_hi = readl(iommu->reg + reg + 12);
1814         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1815 }
1816
1817 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1818                 u8 fault_reason, u32 pasid, u16 source_id,
1819                 unsigned long long addr)
1820 {
1821         const char *reason;
1822         int fault_type;
1823
1824         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1825
1826         if (fault_type == INTR_REMAP)
1827                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1828                         source_id >> 8, PCI_SLOT(source_id & 0xFF),
1829                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1830                         fault_reason, reason);
1831         else
1832                 pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1833                        type ? "DMA Read" : "DMA Write",
1834                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1835                        PCI_FUNC(source_id & 0xFF), pasid, addr,
1836                        fault_reason, reason);
1837         return 0;
1838 }
1839
1840 #define PRIMARY_FAULT_REG_LEN (16)
1841 irqreturn_t dmar_fault(int irq, void *dev_id)
1842 {
1843         struct intel_iommu *iommu = dev_id;
1844         int reg, fault_index;
1845         u32 fault_status;
1846         unsigned long flag;
1847         static DEFINE_RATELIMIT_STATE(rs,
1848                                       DEFAULT_RATELIMIT_INTERVAL,
1849                                       DEFAULT_RATELIMIT_BURST);
1850
1851         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1852         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1853         if (fault_status && __ratelimit(&rs))
1854                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1855
1856         /* TBD: ignore advanced fault log currently */
1857         if (!(fault_status & DMA_FSTS_PPF))
1858                 goto unlock_exit;
1859
1860         fault_index = dma_fsts_fault_record_index(fault_status);
1861         reg = cap_fault_reg_offset(iommu->cap);
1862         while (1) {
1863                 /* Disable printing, simply clear the fault when ratelimited */
1864                 bool ratelimited = !__ratelimit(&rs);
1865                 u8 fault_reason;
1866                 u16 source_id;
1867                 u64 guest_addr;
1868                 u32 pasid;
1869                 int type;
1870                 u32 data;
1871                 bool pasid_present;
1872
1873                 /* highest 32 bits */
1874                 data = readl(iommu->reg + reg +
1875                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1876                 if (!(data & DMA_FRCD_F))
1877                         break;
1878
1879                 if (!ratelimited) {
1880                         fault_reason = dma_frcd_fault_reason(data);
1881                         type = dma_frcd_type(data);
1882
1883                         pasid = dma_frcd_pasid_value(data);
1884                         data = readl(iommu->reg + reg +
1885                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1886                         source_id = dma_frcd_source_id(data);
1887
1888                         pasid_present = dma_frcd_pasid_present(data);
1889                         guest_addr = dmar_readq(iommu->reg + reg +
1890                                         fault_index * PRIMARY_FAULT_REG_LEN);
1891                         guest_addr = dma_frcd_page_addr(guest_addr);
1892                 }
1893
1894                 /* clear the fault */
1895                 writel(DMA_FRCD_F, iommu->reg + reg +
1896                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1897
1898                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1899
1900                 if (!ratelimited)
1901                         /* Using pasid -1 if pasid is not present */
1902                         dmar_fault_do_one(iommu, type, fault_reason,
1903                                           pasid_present ? pasid : -1,
1904                                           source_id, guest_addr);
1905
1906                 fault_index++;
1907                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1908                         fault_index = 0;
1909                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1910         }
1911
1912         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1913                iommu->reg + DMAR_FSTS_REG);
1914
1915 unlock_exit:
1916         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1917         return IRQ_HANDLED;
1918 }
1919
1920 int dmar_set_interrupt(struct intel_iommu *iommu)
1921 {
1922         int irq, ret;
1923
1924         /*
1925          * Check if the fault interrupt is already initialized.
1926          */
1927         if (iommu->irq)
1928                 return 0;
1929
1930         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1931         if (irq > 0) {
1932                 iommu->irq = irq;
1933         } else {
1934                 pr_err("No free IRQ vectors\n");
1935                 return -EINVAL;
1936         }
1937
1938         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1939         if (ret)
1940                 pr_err("Can't request irq\n");
1941         return ret;
1942 }
1943
1944 int __init enable_drhd_fault_handling(void)
1945 {
1946         struct dmar_drhd_unit *drhd;
1947         struct intel_iommu *iommu;
1948
1949         /*
1950          * Enable fault control interrupt.
1951          */
1952         for_each_iommu(iommu, drhd) {
1953                 u32 fault_status;
1954                 int ret = dmar_set_interrupt(iommu);
1955
1956                 if (ret) {
1957                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1958                                (unsigned long long)drhd->reg_base_addr, ret);
1959                         return -1;
1960                 }
1961
1962                 /*
1963                  * Clear any previous faults.
1964                  */
1965                 dmar_fault(iommu->irq, iommu);
1966                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1967                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1968         }
1969
1970         return 0;
1971 }
1972
1973 /*
1974  * Re-enable Queued Invalidation interface.
1975  */
1976 int dmar_reenable_qi(struct intel_iommu *iommu)
1977 {
1978         if (!ecap_qis(iommu->ecap))
1979                 return -ENOENT;
1980
1981         if (!iommu->qi)
1982                 return -ENOENT;
1983
1984         /*
1985          * First disable queued invalidation.
1986          */
1987         dmar_disable_qi(iommu);
1988         /*
1989          * Then enable queued invalidation again. Since there is no pending
1990          * invalidation requests now, it's safe to re-enable queued
1991          * invalidation.
1992          */
1993         __dmar_enable_qi(iommu);
1994
1995         return 0;
1996 }
1997
1998 /*
1999  * Check interrupt remapping support in DMAR table description.
2000  */
2001 int __init dmar_ir_support(void)
2002 {
2003         struct acpi_table_dmar *dmar;
2004         dmar = (struct acpi_table_dmar *)dmar_tbl;
2005         if (!dmar)
2006                 return 0;
2007         return dmar->flags & 0x1;
2008 }
2009
2010 /* Check whether DMAR units are in use */
2011 static inline bool dmar_in_use(void)
2012 {
2013         return irq_remapping_enabled || intel_iommu_enabled;
2014 }
2015
2016 static int __init dmar_free_unused_resources(void)
2017 {
2018         struct dmar_drhd_unit *dmaru, *dmaru_n;
2019
2020         if (dmar_in_use())
2021                 return 0;
2022
2023         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2024                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2025
2026         down_write(&dmar_global_lock);
2027         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2028                 list_del(&dmaru->list);
2029                 dmar_free_drhd(dmaru);
2030         }
2031         up_write(&dmar_global_lock);
2032
2033         return 0;
2034 }
2035
2036 late_initcall(dmar_free_unused_resources);
2037 IOMMU_INIT_POST(detect_intel_iommu);
2038
2039 /*
2040  * DMAR Hotplug Support
2041  * For more details, please refer to Intel(R) Virtualization Technology
2042  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2043  * "Remapping Hardware Unit Hot Plug".
2044  */
2045 static guid_t dmar_hp_guid =
2046         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2047                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2048
2049 /*
2050  * Currently there's only one revision and BIOS will not check the revision id,
2051  * so use 0 for safety.
2052  */
2053 #define DMAR_DSM_REV_ID                 0
2054 #define DMAR_DSM_FUNC_DRHD              1
2055 #define DMAR_DSM_FUNC_ATSR              2
2056 #define DMAR_DSM_FUNC_RHSA              3
2057
2058 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2059 {
2060         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2061 }
2062
2063 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2064                                   dmar_res_handler_t handler, void *arg)
2065 {
2066         int ret = -ENODEV;
2067         union acpi_object *obj;
2068         struct acpi_dmar_header *start;
2069         struct dmar_res_callback callback;
2070         static int res_type[] = {
2071                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2072                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2073                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2074         };
2075
2076         if (!dmar_detect_dsm(handle, func))
2077                 return 0;
2078
2079         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2080                                       func, NULL, ACPI_TYPE_BUFFER);
2081         if (!obj)
2082                 return -ENODEV;
2083
2084         memset(&callback, 0, sizeof(callback));
2085         callback.cb[res_type[func]] = handler;
2086         callback.arg[res_type[func]] = arg;
2087         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2088         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2089
2090         ACPI_FREE(obj);
2091
2092         return ret;
2093 }
2094
2095 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2096 {
2097         int ret;
2098         struct dmar_drhd_unit *dmaru;
2099
2100         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2101         if (!dmaru)
2102                 return -ENODEV;
2103
2104         ret = dmar_ir_hotplug(dmaru, true);
2105         if (ret == 0)
2106                 ret = dmar_iommu_hotplug(dmaru, true);
2107
2108         return ret;
2109 }
2110
2111 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2112 {
2113         int i, ret;
2114         struct device *dev;
2115         struct dmar_drhd_unit *dmaru;
2116
2117         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2118         if (!dmaru)
2119                 return 0;
2120
2121         /*
2122          * All PCI devices managed by this unit should have been destroyed.
2123          */
2124         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2125                 for_each_active_dev_scope(dmaru->devices,
2126                                           dmaru->devices_cnt, i, dev)
2127                         return -EBUSY;
2128         }
2129
2130         ret = dmar_ir_hotplug(dmaru, false);
2131         if (ret == 0)
2132                 ret = dmar_iommu_hotplug(dmaru, false);
2133
2134         return ret;
2135 }
2136
2137 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2138 {
2139         struct dmar_drhd_unit *dmaru;
2140
2141         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2142         if (dmaru) {
2143                 list_del_rcu(&dmaru->list);
2144                 synchronize_rcu();
2145                 dmar_free_drhd(dmaru);
2146         }
2147
2148         return 0;
2149 }
2150
2151 static int dmar_hotplug_insert(acpi_handle handle)
2152 {
2153         int ret;
2154         int drhd_count = 0;
2155
2156         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2157                                      &dmar_validate_one_drhd, (void *)1);
2158         if (ret)
2159                 goto out;
2160
2161         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2162                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2163         if (ret == 0 && drhd_count == 0) {
2164                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2165                 goto out;
2166         } else if (ret) {
2167                 goto release_drhd;
2168         }
2169
2170         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2171                                      &dmar_parse_one_rhsa, NULL);
2172         if (ret)
2173                 goto release_drhd;
2174
2175         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2176                                      &dmar_parse_one_atsr, NULL);
2177         if (ret)
2178                 goto release_atsr;
2179
2180         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2181                                      &dmar_hp_add_drhd, NULL);
2182         if (!ret)
2183                 return 0;
2184
2185         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2186                                &dmar_hp_remove_drhd, NULL);
2187 release_atsr:
2188         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2189                                &dmar_release_one_atsr, NULL);
2190 release_drhd:
2191         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2192                                &dmar_hp_release_drhd, NULL);
2193 out:
2194         return ret;
2195 }
2196
2197 static int dmar_hotplug_remove(acpi_handle handle)
2198 {
2199         int ret;
2200
2201         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2202                                      &dmar_check_one_atsr, NULL);
2203         if (ret)
2204                 return ret;
2205
2206         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2207                                      &dmar_hp_remove_drhd, NULL);
2208         if (ret == 0) {
2209                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2210                                                &dmar_release_one_atsr, NULL));
2211                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2212                                                &dmar_hp_release_drhd, NULL));
2213         } else {
2214                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2215                                        &dmar_hp_add_drhd, NULL);
2216         }
2217
2218         return ret;
2219 }
2220
2221 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2222                                        void *context, void **retval)
2223 {
2224         acpi_handle *phdl = retval;
2225
2226         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2227                 *phdl = handle;
2228                 return AE_CTRL_TERMINATE;
2229         }
2230
2231         return AE_OK;
2232 }
2233
2234 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2235 {
2236         int ret;
2237         acpi_handle tmp = NULL;
2238         acpi_status status;
2239
2240         if (!dmar_in_use())
2241                 return 0;
2242
2243         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2244                 tmp = handle;
2245         } else {
2246                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2247                                              ACPI_UINT32_MAX,
2248                                              dmar_get_dsm_handle,
2249                                              NULL, NULL, &tmp);
2250                 if (ACPI_FAILURE(status)) {
2251                         pr_warn("Failed to locate _DSM method.\n");
2252                         return -ENXIO;
2253                 }
2254         }
2255         if (tmp == NULL)
2256                 return 0;
2257
2258         down_write(&dmar_global_lock);
2259         if (insert)
2260                 ret = dmar_hotplug_insert(tmp);
2261         else
2262                 ret = dmar_hotplug_remove(tmp);
2263         up_write(&dmar_global_lock);
2264
2265         return ret;
2266 }
2267
2268 int dmar_device_add(acpi_handle handle)
2269 {
2270         return dmar_device_hotplug(handle, true);
2271 }
2272
2273 int dmar_device_remove(acpi_handle handle)
2274 {
2275         return dmar_device_hotplug(handle, false);
2276 }
2277
2278 /*
2279  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2280  *
2281  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2282  * the ACPI DMAR table. This means that the platform boot firmware has made
2283  * sure no device can issue DMA outside of RMRR regions.
2284  */
2285 bool dmar_platform_optin(void)
2286 {
2287         struct acpi_table_dmar *dmar;
2288         acpi_status status;
2289         bool ret;
2290
2291         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2292                                 (struct acpi_table_header **)&dmar);
2293         if (ACPI_FAILURE(status))
2294                 return false;
2295
2296         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2297         acpi_put_table((struct acpi_table_header *)dmar);
2298
2299         return ret;
2300 }
2301 EXPORT_SYMBOL_GPL(dmar_platform_optin);