Merge 5.10-rc4 into char-misc-next
[linux-2.6-microblaze.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34
35 #include "../irq_remapping.h"
36
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
40         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
41         bool                    ignore_unhandled;
42         bool                    print_entry;
43 };
44
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66
67 extern const struct iommu_ops intel_iommu_ops;
68
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71         /*
72          * add INCLUDE_ALL at the tail, so scan the list will find it at
73          * the very end.
74          */
75         if (drhd->include_all)
76                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77         else
78                 list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83         struct acpi_dmar_device_scope *scope;
84
85         *cnt = 0;
86         while (start < end) {
87                 scope = start;
88                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91                         (*cnt)++;
92                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94                         pr_warn("Unsupported device scope\n");
95                 }
96                 start += scope->length;
97         }
98         if (*cnt == 0)
99                 return NULL;
100
101         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106         int i;
107         struct device *tmp_dev;
108
109         if (*devices && *cnt) {
110                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111                         put_device(tmp_dev);
112                 kfree(*devices);
113         }
114
115         *devices = NULL;
116         *cnt = 0;
117 }
118
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125         int level = 0;
126         size_t size;
127         struct pci_dev *tmp;
128         struct dmar_pci_notify_info *info;
129
130         BUG_ON(dev->is_virtfn);
131
132         /*
133          * Ignore devices that have a domain number higher than what can
134          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135          */
136         if (pci_domain_nr(dev->bus) > U16_MAX)
137                 return NULL;
138
139         /* Only generate path[] for device addition event */
140         if (event == BUS_NOTIFY_ADD_DEVICE)
141                 for (tmp = dev; tmp; tmp = tmp->bus->self)
142                         level++;
143
144         size = struct_size(info, path, level);
145         if (size <= sizeof(dmar_pci_notify_info_buf)) {
146                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147         } else {
148                 info = kzalloc(size, GFP_KERNEL);
149                 if (!info) {
150                         pr_warn("Out of memory when allocating notify_info "
151                                 "for %s.\n", pci_name(dev));
152                         if (dmar_dev_scope_status == 0)
153                                 dmar_dev_scope_status = -ENOMEM;
154                         return NULL;
155                 }
156         }
157
158         info->event = event;
159         info->dev = dev;
160         info->seg = pci_domain_nr(dev->bus);
161         info->level = level;
162         if (event == BUS_NOTIFY_ADD_DEVICE) {
163                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
164                         level--;
165                         info->path[level].bus = tmp->bus->number;
166                         info->path[level].device = PCI_SLOT(tmp->devfn);
167                         info->path[level].function = PCI_FUNC(tmp->devfn);
168                         if (pci_is_root_bus(tmp->bus))
169                                 info->bus = tmp->bus->number;
170                 }
171         }
172
173         return info;
174 }
175
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178         if ((void *)info != dmar_pci_notify_info_buf)
179                 kfree(info);
180 }
181
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183                                 struct acpi_dmar_pci_path *path, int count)
184 {
185         int i;
186
187         if (info->bus != bus)
188                 goto fallback;
189         if (info->level != count)
190                 goto fallback;
191
192         for (i = 0; i < count; i++) {
193                 if (path[i].device != info->path[i].device ||
194                     path[i].function != info->path[i].function)
195                         goto fallback;
196         }
197
198         return true;
199
200 fallback:
201
202         if (count != 1)
203                 return false;
204
205         i = info->level - 1;
206         if (bus              == info->path[i].bus &&
207             path[0].device   == info->path[i].device &&
208             path[0].function == info->path[i].function) {
209                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210                         bus, path[0].device, path[0].function);
211                 return true;
212         }
213
214         return false;
215 }
216
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219                           void *start, void*end, u16 segment,
220                           struct dmar_dev_scope *devices,
221                           int devices_cnt)
222 {
223         int i, level;
224         struct device *tmp, *dev = &info->dev->dev;
225         struct acpi_dmar_device_scope *scope;
226         struct acpi_dmar_pci_path *path;
227
228         if (segment != info->seg)
229                 return 0;
230
231         for (; start < end; start += scope->length) {
232                 scope = start;
233                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235                         continue;
236
237                 path = (struct acpi_dmar_pci_path *)(scope + 1);
238                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
239                 if (!dmar_match_pci_path(info, scope->bus, path, level))
240                         continue;
241
242                 /*
243                  * We expect devices with endpoint scope to have normal PCI
244                  * headers, and devices with bridge scope to have bridge PCI
245                  * headers.  However PCI NTB devices may be listed in the
246                  * DMAR table with bridge scope, even though they have a
247                  * normal PCI header.  NTB devices are identified by class
248                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249                  * for this special case.
250                  */
251                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256                         pr_warn("Device scope type does not match for %s\n",
257                                 pci_name(info->dev));
258                         return -EINVAL;
259                 }
260
261                 for_each_dev_scope(devices, devices_cnt, i, tmp)
262                         if (tmp == NULL) {
263                                 devices[i].bus = info->dev->bus->number;
264                                 devices[i].devfn = info->dev->devfn;
265                                 rcu_assign_pointer(devices[i].dev,
266                                                    get_device(dev));
267                                 return 1;
268                         }
269                 BUG_ON(i >= devices_cnt);
270         }
271
272         return 0;
273 }
274
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276                           struct dmar_dev_scope *devices, int count)
277 {
278         int index;
279         struct device *tmp;
280
281         if (info->seg != segment)
282                 return 0;
283
284         for_each_active_dev_scope(devices, count, index, tmp)
285                 if (tmp == &info->dev->dev) {
286                         RCU_INIT_POINTER(devices[index].dev, NULL);
287                         synchronize_rcu();
288                         put_device(tmp);
289                         return 1;
290                 }
291
292         return 0;
293 }
294
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297         int ret = 0;
298         struct dmar_drhd_unit *dmaru;
299         struct acpi_dmar_hardware_unit *drhd;
300
301         for_each_drhd_unit(dmaru) {
302                 if (dmaru->include_all)
303                         continue;
304
305                 drhd = container_of(dmaru->hdr,
306                                     struct acpi_dmar_hardware_unit, header);
307                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308                                 ((void *)drhd) + drhd->header.length,
309                                 dmaru->segment,
310                                 dmaru->devices, dmaru->devices_cnt);
311                 if (ret)
312                         break;
313         }
314         if (ret >= 0)
315                 ret = dmar_iommu_notify_scope_dev(info);
316         if (ret < 0 && dmar_dev_scope_status == 0)
317                 dmar_dev_scope_status = ret;
318
319         if (ret >= 0)
320                 intel_irq_remap_add_device(info);
321
322         return ret;
323 }
324
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327         struct dmar_drhd_unit *dmaru;
328
329         for_each_drhd_unit(dmaru)
330                 if (dmar_remove_dev_scope(info, dmaru->segment,
331                         dmaru->devices, dmaru->devices_cnt))
332                         break;
333         dmar_iommu_notify_scope_dev(info);
334 }
335
336 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
337 {
338         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev));
339 }
340
341 static int dmar_pci_bus_notifier(struct notifier_block *nb,
342                                  unsigned long action, void *data)
343 {
344         struct pci_dev *pdev = to_pci_dev(data);
345         struct dmar_pci_notify_info *info;
346
347         /* Only care about add/remove events for physical functions.
348          * For VFs we actually do the lookup based on the corresponding
349          * PF in device_to_iommu() anyway. */
350         if (pdev->is_virtfn) {
351                 /*
352                  * Ensure that the VF device inherits the irq domain of the
353                  * PF device. Ideally the device would inherit the domain
354                  * from the bus, but DMAR can have multiple units per bus
355                  * which makes this impossible. The VF 'bus' could inherit
356                  * from the PF device, but that's yet another x86'sism to
357                  * inflict on everybody else.
358                  */
359                 if (action == BUS_NOTIFY_ADD_DEVICE)
360                         vf_inherit_msi_domain(pdev);
361                 return NOTIFY_DONE;
362         }
363
364         if (action != BUS_NOTIFY_ADD_DEVICE &&
365             action != BUS_NOTIFY_REMOVED_DEVICE)
366                 return NOTIFY_DONE;
367
368         info = dmar_alloc_pci_notify_info(pdev, action);
369         if (!info)
370                 return NOTIFY_DONE;
371
372         down_write(&dmar_global_lock);
373         if (action == BUS_NOTIFY_ADD_DEVICE)
374                 dmar_pci_bus_add_dev(info);
375         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
376                 dmar_pci_bus_del_dev(info);
377         up_write(&dmar_global_lock);
378
379         dmar_free_pci_notify_info(info);
380
381         return NOTIFY_OK;
382 }
383
384 static struct notifier_block dmar_pci_bus_nb = {
385         .notifier_call = dmar_pci_bus_notifier,
386         .priority = INT_MIN,
387 };
388
389 static struct dmar_drhd_unit *
390 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
391 {
392         struct dmar_drhd_unit *dmaru;
393
394         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
395                                 dmar_rcu_check())
396                 if (dmaru->segment == drhd->segment &&
397                     dmaru->reg_base_addr == drhd->address)
398                         return dmaru;
399
400         return NULL;
401 }
402
403 /*
404  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
405  * structure which uniquely represent one DMA remapping hardware unit
406  * present in the platform
407  */
408 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
409 {
410         struct acpi_dmar_hardware_unit *drhd;
411         struct dmar_drhd_unit *dmaru;
412         int ret;
413
414         drhd = (struct acpi_dmar_hardware_unit *)header;
415         dmaru = dmar_find_dmaru(drhd);
416         if (dmaru)
417                 goto out;
418
419         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
420         if (!dmaru)
421                 return -ENOMEM;
422
423         /*
424          * If header is allocated from slab by ACPI _DSM method, we need to
425          * copy the content because the memory buffer will be freed on return.
426          */
427         dmaru->hdr = (void *)(dmaru + 1);
428         memcpy(dmaru->hdr, header, header->length);
429         dmaru->reg_base_addr = drhd->address;
430         dmaru->segment = drhd->segment;
431         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
432         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
433                                               ((void *)drhd) + drhd->header.length,
434                                               &dmaru->devices_cnt);
435         if (dmaru->devices_cnt && dmaru->devices == NULL) {
436                 kfree(dmaru);
437                 return -ENOMEM;
438         }
439
440         ret = alloc_iommu(dmaru);
441         if (ret) {
442                 dmar_free_dev_scope(&dmaru->devices,
443                                     &dmaru->devices_cnt);
444                 kfree(dmaru);
445                 return ret;
446         }
447         dmar_register_drhd_unit(dmaru);
448
449 out:
450         if (arg)
451                 (*(int *)arg)++;
452
453         return 0;
454 }
455
456 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
457 {
458         if (dmaru->devices && dmaru->devices_cnt)
459                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
460         if (dmaru->iommu)
461                 free_iommu(dmaru->iommu);
462         kfree(dmaru);
463 }
464
465 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
466                                       void *arg)
467 {
468         struct acpi_dmar_andd *andd = (void *)header;
469
470         /* Check for NUL termination within the designated length */
471         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
472                 pr_warn(FW_BUG
473                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
474                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
475                            dmi_get_system_info(DMI_BIOS_VENDOR),
476                            dmi_get_system_info(DMI_BIOS_VERSION),
477                            dmi_get_system_info(DMI_PRODUCT_VERSION));
478                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
479                 return -EINVAL;
480         }
481         pr_info("ANDD device: %x name: %s\n", andd->device_number,
482                 andd->device_name);
483
484         return 0;
485 }
486
487 #ifdef CONFIG_ACPI_NUMA
488 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
489 {
490         struct acpi_dmar_rhsa *rhsa;
491         struct dmar_drhd_unit *drhd;
492
493         rhsa = (struct acpi_dmar_rhsa *)header;
494         for_each_drhd_unit(drhd) {
495                 if (drhd->reg_base_addr == rhsa->base_address) {
496                         int node = pxm_to_node(rhsa->proximity_domain);
497
498                         if (!node_online(node))
499                                 node = NUMA_NO_NODE;
500                         drhd->iommu->node = node;
501                         return 0;
502                 }
503         }
504         pr_warn(FW_BUG
505                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
506                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
507                 rhsa->base_address,
508                 dmi_get_system_info(DMI_BIOS_VENDOR),
509                 dmi_get_system_info(DMI_BIOS_VERSION),
510                 dmi_get_system_info(DMI_PRODUCT_VERSION));
511         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
512
513         return 0;
514 }
515 #else
516 #define dmar_parse_one_rhsa             dmar_res_noop
517 #endif
518
519 static void
520 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
521 {
522         struct acpi_dmar_hardware_unit *drhd;
523         struct acpi_dmar_reserved_memory *rmrr;
524         struct acpi_dmar_atsr *atsr;
525         struct acpi_dmar_rhsa *rhsa;
526
527         switch (header->type) {
528         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
529                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
530                                     header);
531                 pr_info("DRHD base: %#016Lx flags: %#x\n",
532                         (unsigned long long)drhd->address, drhd->flags);
533                 break;
534         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
535                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
536                                     header);
537                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
538                         (unsigned long long)rmrr->base_address,
539                         (unsigned long long)rmrr->end_address);
540                 break;
541         case ACPI_DMAR_TYPE_ROOT_ATS:
542                 atsr = container_of(header, struct acpi_dmar_atsr, header);
543                 pr_info("ATSR flags: %#x\n", atsr->flags);
544                 break;
545         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
546                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
547                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
548                        (unsigned long long)rhsa->base_address,
549                        rhsa->proximity_domain);
550                 break;
551         case ACPI_DMAR_TYPE_NAMESPACE:
552                 /* We don't print this here because we need to sanity-check
553                    it first. So print it in dmar_parse_one_andd() instead. */
554                 break;
555         }
556 }
557
558 /**
559  * dmar_table_detect - checks to see if the platform supports DMAR devices
560  */
561 static int __init dmar_table_detect(void)
562 {
563         acpi_status status = AE_OK;
564
565         /* if we could find DMAR table, then there are DMAR devices */
566         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
567
568         if (ACPI_SUCCESS(status) && !dmar_tbl) {
569                 pr_warn("Unable to map DMAR\n");
570                 status = AE_NOT_FOUND;
571         }
572
573         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
574 }
575
576 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
577                                        size_t len, struct dmar_res_callback *cb)
578 {
579         struct acpi_dmar_header *iter, *next;
580         struct acpi_dmar_header *end = ((void *)start) + len;
581
582         for (iter = start; iter < end; iter = next) {
583                 next = (void *)iter + iter->length;
584                 if (iter->length == 0) {
585                         /* Avoid looping forever on bad ACPI tables */
586                         pr_debug(FW_BUG "Invalid 0-length structure\n");
587                         break;
588                 } else if (next > end) {
589                         /* Avoid passing table end */
590                         pr_warn(FW_BUG "Record passes table end\n");
591                         return -EINVAL;
592                 }
593
594                 if (cb->print_entry)
595                         dmar_table_print_dmar_entry(iter);
596
597                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
598                         /* continue for forward compatibility */
599                         pr_debug("Unknown DMAR structure type %d\n",
600                                  iter->type);
601                 } else if (cb->cb[iter->type]) {
602                         int ret;
603
604                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
605                         if (ret)
606                                 return ret;
607                 } else if (!cb->ignore_unhandled) {
608                         pr_warn("No handler for DMAR structure type %d\n",
609                                 iter->type);
610                         return -EINVAL;
611                 }
612         }
613
614         return 0;
615 }
616
617 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
618                                        struct dmar_res_callback *cb)
619 {
620         return dmar_walk_remapping_entries((void *)(dmar + 1),
621                         dmar->header.length - sizeof(*dmar), cb);
622 }
623
624 /**
625  * parse_dmar_table - parses the DMA reporting table
626  */
627 static int __init
628 parse_dmar_table(void)
629 {
630         struct acpi_table_dmar *dmar;
631         int drhd_count = 0;
632         int ret;
633         struct dmar_res_callback cb = {
634                 .print_entry = true,
635                 .ignore_unhandled = true,
636                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
637                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
638                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
639                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
640                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
641                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
642         };
643
644         /*
645          * Do it again, earlier dmar_tbl mapping could be mapped with
646          * fixed map.
647          */
648         dmar_table_detect();
649
650         /*
651          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
652          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
653          */
654         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
655
656         dmar = (struct acpi_table_dmar *)dmar_tbl;
657         if (!dmar)
658                 return -ENODEV;
659
660         if (dmar->width < PAGE_SHIFT - 1) {
661                 pr_warn("Invalid DMAR haw\n");
662                 return -EINVAL;
663         }
664
665         pr_info("Host address width %d\n", dmar->width + 1);
666         ret = dmar_walk_dmar_table(dmar, &cb);
667         if (ret == 0 && drhd_count == 0)
668                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
669
670         return ret;
671 }
672
673 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
674                                  int cnt, struct pci_dev *dev)
675 {
676         int index;
677         struct device *tmp;
678
679         while (dev) {
680                 for_each_active_dev_scope(devices, cnt, index, tmp)
681                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
682                                 return 1;
683
684                 /* Check our parent */
685                 dev = dev->bus->self;
686         }
687
688         return 0;
689 }
690
691 struct dmar_drhd_unit *
692 dmar_find_matched_drhd_unit(struct pci_dev *dev)
693 {
694         struct dmar_drhd_unit *dmaru;
695         struct acpi_dmar_hardware_unit *drhd;
696
697         dev = pci_physfn(dev);
698
699         rcu_read_lock();
700         for_each_drhd_unit(dmaru) {
701                 drhd = container_of(dmaru->hdr,
702                                     struct acpi_dmar_hardware_unit,
703                                     header);
704
705                 if (dmaru->include_all &&
706                     drhd->segment == pci_domain_nr(dev->bus))
707                         goto out;
708
709                 if (dmar_pci_device_match(dmaru->devices,
710                                           dmaru->devices_cnt, dev))
711                         goto out;
712         }
713         dmaru = NULL;
714 out:
715         rcu_read_unlock();
716
717         return dmaru;
718 }
719
720 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
721                                               struct acpi_device *adev)
722 {
723         struct dmar_drhd_unit *dmaru;
724         struct acpi_dmar_hardware_unit *drhd;
725         struct acpi_dmar_device_scope *scope;
726         struct device *tmp;
727         int i;
728         struct acpi_dmar_pci_path *path;
729
730         for_each_drhd_unit(dmaru) {
731                 drhd = container_of(dmaru->hdr,
732                                     struct acpi_dmar_hardware_unit,
733                                     header);
734
735                 for (scope = (void *)(drhd + 1);
736                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
737                      scope = ((void *)scope) + scope->length) {
738                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
739                                 continue;
740                         if (scope->enumeration_id != device_number)
741                                 continue;
742
743                         path = (void *)(scope + 1);
744                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
745                                 dev_name(&adev->dev), dmaru->reg_base_addr,
746                                 scope->bus, path->device, path->function);
747                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
748                                 if (tmp == NULL) {
749                                         dmaru->devices[i].bus = scope->bus;
750                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
751                                                                             path->function);
752                                         rcu_assign_pointer(dmaru->devices[i].dev,
753                                                            get_device(&adev->dev));
754                                         return;
755                                 }
756                         BUG_ON(i >= dmaru->devices_cnt);
757                 }
758         }
759         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
760                 device_number, dev_name(&adev->dev));
761 }
762
763 static int __init dmar_acpi_dev_scope_init(void)
764 {
765         struct acpi_dmar_andd *andd;
766
767         if (dmar_tbl == NULL)
768                 return -ENODEV;
769
770         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
771              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
772              andd = ((void *)andd) + andd->header.length) {
773                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
774                         acpi_handle h;
775                         struct acpi_device *adev;
776
777                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
778                                                           andd->device_name,
779                                                           &h))) {
780                                 pr_err("Failed to find handle for ACPI object %s\n",
781                                        andd->device_name);
782                                 continue;
783                         }
784                         if (acpi_bus_get_device(h, &adev)) {
785                                 pr_err("Failed to get device for ACPI object %s\n",
786                                        andd->device_name);
787                                 continue;
788                         }
789                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
790                 }
791         }
792         return 0;
793 }
794
795 int __init dmar_dev_scope_init(void)
796 {
797         struct pci_dev *dev = NULL;
798         struct dmar_pci_notify_info *info;
799
800         if (dmar_dev_scope_status != 1)
801                 return dmar_dev_scope_status;
802
803         if (list_empty(&dmar_drhd_units)) {
804                 dmar_dev_scope_status = -ENODEV;
805         } else {
806                 dmar_dev_scope_status = 0;
807
808                 dmar_acpi_dev_scope_init();
809
810                 for_each_pci_dev(dev) {
811                         if (dev->is_virtfn)
812                                 continue;
813
814                         info = dmar_alloc_pci_notify_info(dev,
815                                         BUS_NOTIFY_ADD_DEVICE);
816                         if (!info) {
817                                 return dmar_dev_scope_status;
818                         } else {
819                                 dmar_pci_bus_add_dev(info);
820                                 dmar_free_pci_notify_info(info);
821                         }
822                 }
823         }
824
825         return dmar_dev_scope_status;
826 }
827
828 void __init dmar_register_bus_notifier(void)
829 {
830         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
831 }
832
833
834 int __init dmar_table_init(void)
835 {
836         static int dmar_table_initialized;
837         int ret;
838
839         if (dmar_table_initialized == 0) {
840                 ret = parse_dmar_table();
841                 if (ret < 0) {
842                         if (ret != -ENODEV)
843                                 pr_info("Parse DMAR table failure.\n");
844                 } else  if (list_empty(&dmar_drhd_units)) {
845                         pr_info("No DMAR devices found\n");
846                         ret = -ENODEV;
847                 }
848
849                 if (ret < 0)
850                         dmar_table_initialized = ret;
851                 else
852                         dmar_table_initialized = 1;
853         }
854
855         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
856 }
857
858 static void warn_invalid_dmar(u64 addr, const char *message)
859 {
860         pr_warn_once(FW_BUG
861                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
862                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
863                 addr, message,
864                 dmi_get_system_info(DMI_BIOS_VENDOR),
865                 dmi_get_system_info(DMI_BIOS_VERSION),
866                 dmi_get_system_info(DMI_PRODUCT_VERSION));
867         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
868 }
869
870 static int __ref
871 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
872 {
873         struct acpi_dmar_hardware_unit *drhd;
874         void __iomem *addr;
875         u64 cap, ecap;
876
877         drhd = (void *)entry;
878         if (!drhd->address) {
879                 warn_invalid_dmar(0, "");
880                 return -EINVAL;
881         }
882
883         if (arg)
884                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
885         else
886                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
887         if (!addr) {
888                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
889                 return -EINVAL;
890         }
891
892         cap = dmar_readq(addr + DMAR_CAP_REG);
893         ecap = dmar_readq(addr + DMAR_ECAP_REG);
894
895         if (arg)
896                 iounmap(addr);
897         else
898                 early_iounmap(addr, VTD_PAGE_SIZE);
899
900         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
901                 warn_invalid_dmar(drhd->address, " returns all ones");
902                 return -EINVAL;
903         }
904
905         return 0;
906 }
907
908 int __init detect_intel_iommu(void)
909 {
910         int ret;
911         struct dmar_res_callback validate_drhd_cb = {
912                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
913                 .ignore_unhandled = true,
914         };
915
916         down_write(&dmar_global_lock);
917         ret = dmar_table_detect();
918         if (!ret)
919                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
920                                            &validate_drhd_cb);
921         if (!ret && !no_iommu && !iommu_detected &&
922             (!dmar_disabled || dmar_platform_optin())) {
923                 iommu_detected = 1;
924                 /* Make sure ACS will be enabled */
925                 pci_request_acs();
926         }
927
928 #ifdef CONFIG_X86
929         if (!ret) {
930                 x86_init.iommu.iommu_init = intel_iommu_init;
931                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
932         }
933
934 #endif
935
936         if (dmar_tbl) {
937                 acpi_put_table(dmar_tbl);
938                 dmar_tbl = NULL;
939         }
940         up_write(&dmar_global_lock);
941
942         return ret ? ret : 1;
943 }
944
945 static void unmap_iommu(struct intel_iommu *iommu)
946 {
947         iounmap(iommu->reg);
948         release_mem_region(iommu->reg_phys, iommu->reg_size);
949 }
950
951 /**
952  * map_iommu: map the iommu's registers
953  * @iommu: the iommu to map
954  * @phys_addr: the physical address of the base resgister
955  *
956  * Memory map the iommu's registers.  Start w/ a single page, and
957  * possibly expand if that turns out to be insufficent.
958  */
959 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
960 {
961         int map_size, err=0;
962
963         iommu->reg_phys = phys_addr;
964         iommu->reg_size = VTD_PAGE_SIZE;
965
966         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
967                 pr_err("Can't reserve memory\n");
968                 err = -EBUSY;
969                 goto out;
970         }
971
972         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
973         if (!iommu->reg) {
974                 pr_err("Can't map the region\n");
975                 err = -ENOMEM;
976                 goto release;
977         }
978
979         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
980         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
981
982         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
983                 err = -EINVAL;
984                 warn_invalid_dmar(phys_addr, " returns all ones");
985                 goto unmap;
986         }
987         iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
988
989         /* the registers might be more than one page */
990         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
991                          cap_max_fault_reg_offset(iommu->cap));
992         map_size = VTD_PAGE_ALIGN(map_size);
993         if (map_size > iommu->reg_size) {
994                 iounmap(iommu->reg);
995                 release_mem_region(iommu->reg_phys, iommu->reg_size);
996                 iommu->reg_size = map_size;
997                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
998                                         iommu->name)) {
999                         pr_err("Can't reserve memory\n");
1000                         err = -EBUSY;
1001                         goto out;
1002                 }
1003                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1004                 if (!iommu->reg) {
1005                         pr_err("Can't map the region\n");
1006                         err = -ENOMEM;
1007                         goto release;
1008                 }
1009         }
1010         err = 0;
1011         goto out;
1012
1013 unmap:
1014         iounmap(iommu->reg);
1015 release:
1016         release_mem_region(iommu->reg_phys, iommu->reg_size);
1017 out:
1018         return err;
1019 }
1020
1021 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1022 {
1023         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1024                                             DMAR_UNITS_SUPPORTED);
1025         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1026                 iommu->seq_id = -1;
1027         } else {
1028                 set_bit(iommu->seq_id, dmar_seq_ids);
1029                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1030         }
1031
1032         return iommu->seq_id;
1033 }
1034
1035 static void dmar_free_seq_id(struct intel_iommu *iommu)
1036 {
1037         if (iommu->seq_id >= 0) {
1038                 clear_bit(iommu->seq_id, dmar_seq_ids);
1039                 iommu->seq_id = -1;
1040         }
1041 }
1042
1043 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1044 {
1045         struct intel_iommu *iommu;
1046         u32 ver, sts;
1047         int agaw = -1;
1048         int msagaw = -1;
1049         int err;
1050
1051         if (!drhd->reg_base_addr) {
1052                 warn_invalid_dmar(0, "");
1053                 return -EINVAL;
1054         }
1055
1056         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1057         if (!iommu)
1058                 return -ENOMEM;
1059
1060         if (dmar_alloc_seq_id(iommu) < 0) {
1061                 pr_err("Failed to allocate seq_id\n");
1062                 err = -ENOSPC;
1063                 goto error;
1064         }
1065
1066         err = map_iommu(iommu, drhd->reg_base_addr);
1067         if (err) {
1068                 pr_err("Failed to map %s\n", iommu->name);
1069                 goto error_free_seq_id;
1070         }
1071
1072         err = -EINVAL;
1073         if (cap_sagaw(iommu->cap) == 0) {
1074                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1075                         iommu->name);
1076                 drhd->ignored = 1;
1077         }
1078
1079         if (!drhd->ignored) {
1080                 agaw = iommu_calculate_agaw(iommu);
1081                 if (agaw < 0) {
1082                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1083                                iommu->seq_id);
1084                         drhd->ignored = 1;
1085                 }
1086         }
1087         if (!drhd->ignored) {
1088                 msagaw = iommu_calculate_max_sagaw(iommu);
1089                 if (msagaw < 0) {
1090                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1091                                iommu->seq_id);
1092                         drhd->ignored = 1;
1093                         agaw = -1;
1094                 }
1095         }
1096         iommu->agaw = agaw;
1097         iommu->msagaw = msagaw;
1098         iommu->segment = drhd->segment;
1099
1100         iommu->node = NUMA_NO_NODE;
1101
1102         ver = readl(iommu->reg + DMAR_VER_REG);
1103         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1104                 iommu->name,
1105                 (unsigned long long)drhd->reg_base_addr,
1106                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1107                 (unsigned long long)iommu->cap,
1108                 (unsigned long long)iommu->ecap);
1109
1110         /* Reflect status in gcmd */
1111         sts = readl(iommu->reg + DMAR_GSTS_REG);
1112         if (sts & DMA_GSTS_IRES)
1113                 iommu->gcmd |= DMA_GCMD_IRE;
1114         if (sts & DMA_GSTS_TES)
1115                 iommu->gcmd |= DMA_GCMD_TE;
1116         if (sts & DMA_GSTS_QIES)
1117                 iommu->gcmd |= DMA_GCMD_QIE;
1118
1119         raw_spin_lock_init(&iommu->register_lock);
1120
1121         /*
1122          * This is only for hotplug; at boot time intel_iommu_enabled won't
1123          * be set yet. When intel_iommu_init() runs, it registers the units
1124          * present at boot time, then sets intel_iommu_enabled.
1125          */
1126         if (intel_iommu_enabled && !drhd->ignored) {
1127                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1128                                              intel_iommu_groups,
1129                                              "%s", iommu->name);
1130                 if (err)
1131                         goto err_unmap;
1132
1133                 iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1134
1135                 err = iommu_device_register(&iommu->iommu);
1136                 if (err)
1137                         goto err_unmap;
1138         }
1139
1140         drhd->iommu = iommu;
1141         iommu->drhd = drhd;
1142
1143         return 0;
1144
1145 err_unmap:
1146         unmap_iommu(iommu);
1147 error_free_seq_id:
1148         dmar_free_seq_id(iommu);
1149 error:
1150         kfree(iommu);
1151         return err;
1152 }
1153
1154 static void free_iommu(struct intel_iommu *iommu)
1155 {
1156         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1157                 iommu_device_unregister(&iommu->iommu);
1158                 iommu_device_sysfs_remove(&iommu->iommu);
1159         }
1160
1161         if (iommu->irq) {
1162                 if (iommu->pr_irq) {
1163                         free_irq(iommu->pr_irq, iommu);
1164                         dmar_free_hwirq(iommu->pr_irq);
1165                         iommu->pr_irq = 0;
1166                 }
1167                 free_irq(iommu->irq, iommu);
1168                 dmar_free_hwirq(iommu->irq);
1169                 iommu->irq = 0;
1170         }
1171
1172         if (iommu->qi) {
1173                 free_page((unsigned long)iommu->qi->desc);
1174                 kfree(iommu->qi->desc_status);
1175                 kfree(iommu->qi);
1176         }
1177
1178         if (iommu->reg)
1179                 unmap_iommu(iommu);
1180
1181         dmar_free_seq_id(iommu);
1182         kfree(iommu);
1183 }
1184
1185 /*
1186  * Reclaim all the submitted descriptors which have completed its work.
1187  */
1188 static inline void reclaim_free_desc(struct q_inval *qi)
1189 {
1190         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1191                qi->desc_status[qi->free_tail] == QI_ABORT) {
1192                 qi->desc_status[qi->free_tail] = QI_FREE;
1193                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1194                 qi->free_cnt++;
1195         }
1196 }
1197
1198 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1199 {
1200         u32 fault;
1201         int head, tail;
1202         struct q_inval *qi = iommu->qi;
1203         int shift = qi_shift(iommu);
1204
1205         if (qi->desc_status[wait_index] == QI_ABORT)
1206                 return -EAGAIN;
1207
1208         fault = readl(iommu->reg + DMAR_FSTS_REG);
1209
1210         /*
1211          * If IQE happens, the head points to the descriptor associated
1212          * with the error. No new descriptors are fetched until the IQE
1213          * is cleared.
1214          */
1215         if (fault & DMA_FSTS_IQE) {
1216                 head = readl(iommu->reg + DMAR_IQH_REG);
1217                 if ((head >> shift) == index) {
1218                         struct qi_desc *desc = qi->desc + head;
1219
1220                         /*
1221                          * desc->qw2 and desc->qw3 are either reserved or
1222                          * used by software as private data. We won't print
1223                          * out these two qw's for security consideration.
1224                          */
1225                         pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1226                                (unsigned long long)desc->qw0,
1227                                (unsigned long long)desc->qw1);
1228                         memcpy(desc, qi->desc + (wait_index << shift),
1229                                1 << shift);
1230                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1231                         return -EINVAL;
1232                 }
1233         }
1234
1235         /*
1236          * If ITE happens, all pending wait_desc commands are aborted.
1237          * No new descriptors are fetched until the ITE is cleared.
1238          */
1239         if (fault & DMA_FSTS_ITE) {
1240                 head = readl(iommu->reg + DMAR_IQH_REG);
1241                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1242                 head |= 1;
1243                 tail = readl(iommu->reg + DMAR_IQT_REG);
1244                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1245
1246                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1247
1248                 do {
1249                         if (qi->desc_status[head] == QI_IN_USE)
1250                                 qi->desc_status[head] = QI_ABORT;
1251                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1252                 } while (head != tail);
1253
1254                 if (qi->desc_status[wait_index] == QI_ABORT)
1255                         return -EAGAIN;
1256         }
1257
1258         if (fault & DMA_FSTS_ICE)
1259                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1260
1261         return 0;
1262 }
1263
1264 /*
1265  * Function to submit invalidation descriptors of all types to the queued
1266  * invalidation interface(QI). Multiple descriptors can be submitted at a
1267  * time, a wait descriptor will be appended to each submission to ensure
1268  * hardware has completed the invalidation before return. Wait descriptors
1269  * can be part of the submission but it will not be polled for completion.
1270  */
1271 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1272                    unsigned int count, unsigned long options)
1273 {
1274         struct q_inval *qi = iommu->qi;
1275         struct qi_desc wait_desc;
1276         int wait_index, index;
1277         unsigned long flags;
1278         int offset, shift;
1279         int rc, i;
1280
1281         if (!qi)
1282                 return 0;
1283
1284 restart:
1285         rc = 0;
1286
1287         raw_spin_lock_irqsave(&qi->q_lock, flags);
1288         /*
1289          * Check if we have enough empty slots in the queue to submit,
1290          * the calculation is based on:
1291          * # of desc + 1 wait desc + 1 space between head and tail
1292          */
1293         while (qi->free_cnt < count + 2) {
1294                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1295                 cpu_relax();
1296                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1297         }
1298
1299         index = qi->free_head;
1300         wait_index = (index + count) % QI_LENGTH;
1301         shift = qi_shift(iommu);
1302
1303         for (i = 0; i < count; i++) {
1304                 offset = ((index + i) % QI_LENGTH) << shift;
1305                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1306                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1307         }
1308         qi->desc_status[wait_index] = QI_IN_USE;
1309
1310         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1311                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1312         if (options & QI_OPT_WAIT_DRAIN)
1313                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1314         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1315         wait_desc.qw2 = 0;
1316         wait_desc.qw3 = 0;
1317
1318         offset = wait_index << shift;
1319         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1320
1321         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1322         qi->free_cnt -= count + 1;
1323
1324         /*
1325          * update the HW tail register indicating the presence of
1326          * new descriptors.
1327          */
1328         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1329
1330         while (qi->desc_status[wait_index] != QI_DONE) {
1331                 /*
1332                  * We will leave the interrupts disabled, to prevent interrupt
1333                  * context to queue another cmd while a cmd is already submitted
1334                  * and waiting for completion on this cpu. This is to avoid
1335                  * a deadlock where the interrupt context can wait indefinitely
1336                  * for free slots in the queue.
1337                  */
1338                 rc = qi_check_fault(iommu, index, wait_index);
1339                 if (rc)
1340                         break;
1341
1342                 raw_spin_unlock(&qi->q_lock);
1343                 cpu_relax();
1344                 raw_spin_lock(&qi->q_lock);
1345         }
1346
1347         for (i = 0; i < count; i++)
1348                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1349
1350         reclaim_free_desc(qi);
1351         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1352
1353         if (rc == -EAGAIN)
1354                 goto restart;
1355
1356         return rc;
1357 }
1358
1359 /*
1360  * Flush the global interrupt entry cache.
1361  */
1362 void qi_global_iec(struct intel_iommu *iommu)
1363 {
1364         struct qi_desc desc;
1365
1366         desc.qw0 = QI_IEC_TYPE;
1367         desc.qw1 = 0;
1368         desc.qw2 = 0;
1369         desc.qw3 = 0;
1370
1371         /* should never fail */
1372         qi_submit_sync(iommu, &desc, 1, 0);
1373 }
1374
1375 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1376                       u64 type)
1377 {
1378         struct qi_desc desc;
1379
1380         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1381                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1382         desc.qw1 = 0;
1383         desc.qw2 = 0;
1384         desc.qw3 = 0;
1385
1386         qi_submit_sync(iommu, &desc, 1, 0);
1387 }
1388
1389 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1390                     unsigned int size_order, u64 type)
1391 {
1392         u8 dw = 0, dr = 0;
1393
1394         struct qi_desc desc;
1395         int ih = 0;
1396
1397         if (cap_write_drain(iommu->cap))
1398                 dw = 1;
1399
1400         if (cap_read_drain(iommu->cap))
1401                 dr = 1;
1402
1403         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1404                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1405         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1406                 | QI_IOTLB_AM(size_order);
1407         desc.qw2 = 0;
1408         desc.qw3 = 0;
1409
1410         qi_submit_sync(iommu, &desc, 1, 0);
1411 }
1412
1413 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1414                         u16 qdep, u64 addr, unsigned mask)
1415 {
1416         struct qi_desc desc;
1417
1418         if (mask) {
1419                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1420                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1421         } else
1422                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1423
1424         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1425                 qdep = 0;
1426
1427         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1428                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1429         desc.qw2 = 0;
1430         desc.qw3 = 0;
1431
1432         qi_submit_sync(iommu, &desc, 1, 0);
1433 }
1434
1435 /* PASID-based IOTLB invalidation */
1436 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1437                      unsigned long npages, bool ih)
1438 {
1439         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1440
1441         /*
1442          * npages == -1 means a PASID-selective invalidation, otherwise,
1443          * a positive value for Page-selective-within-PASID invalidation.
1444          * 0 is not a valid input.
1445          */
1446         if (WARN_ON(!npages)) {
1447                 pr_err("Invalid input npages = %ld\n", npages);
1448                 return;
1449         }
1450
1451         if (npages == -1) {
1452                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1453                                 QI_EIOTLB_DID(did) |
1454                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1455                                 QI_EIOTLB_TYPE;
1456                 desc.qw1 = 0;
1457         } else {
1458                 int mask = ilog2(__roundup_pow_of_two(npages));
1459                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1460
1461                 if (WARN_ON_ONCE(!ALIGN(addr, align)))
1462                         addr &= ~(align - 1);
1463
1464                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1465                                 QI_EIOTLB_DID(did) |
1466                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1467                                 QI_EIOTLB_TYPE;
1468                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1469                                 QI_EIOTLB_IH(ih) |
1470                                 QI_EIOTLB_AM(mask);
1471         }
1472
1473         qi_submit_sync(iommu, &desc, 1, 0);
1474 }
1475
1476 /* PASID-based device IOTLB Invalidate */
1477 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1478                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1479 {
1480         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1481         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1482
1483         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1484                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1485                 QI_DEV_IOTLB_PFSID(pfsid);
1486
1487         /*
1488          * If S bit is 0, we only flush a single page. If S bit is set,
1489          * The least significant zero bit indicates the invalidation address
1490          * range. VT-d spec 6.5.2.6.
1491          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1492          * size order = 0 is PAGE_SIZE 4KB
1493          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1494          * ECAP.
1495          */
1496         if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
1497                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1498                                     addr, size_order);
1499
1500         /* Take page address */
1501         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1502
1503         if (size_order) {
1504                 /*
1505                  * Existing 0s in address below size_order may be the least
1506                  * significant bit, we must set them to 1s to avoid having
1507                  * smaller size than desired.
1508                  */
1509                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1510                                         VTD_PAGE_SHIFT);
1511                 /* Clear size_order bit to indicate size */
1512                 desc.qw1 &= ~mask;
1513                 /* Set the S bit to indicate flushing more than 1 page */
1514                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1515         }
1516
1517         qi_submit_sync(iommu, &desc, 1, 0);
1518 }
1519
1520 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1521                           u64 granu, u32 pasid)
1522 {
1523         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1524
1525         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1526                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1527         qi_submit_sync(iommu, &desc, 1, 0);
1528 }
1529
1530 /*
1531  * Disable Queued Invalidation interface.
1532  */
1533 void dmar_disable_qi(struct intel_iommu *iommu)
1534 {
1535         unsigned long flags;
1536         u32 sts;
1537         cycles_t start_time = get_cycles();
1538
1539         if (!ecap_qis(iommu->ecap))
1540                 return;
1541
1542         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1543
1544         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1545         if (!(sts & DMA_GSTS_QIES))
1546                 goto end;
1547
1548         /*
1549          * Give a chance to HW to complete the pending invalidation requests.
1550          */
1551         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1552                 readl(iommu->reg + DMAR_IQH_REG)) &&
1553                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1554                 cpu_relax();
1555
1556         iommu->gcmd &= ~DMA_GCMD_QIE;
1557         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1558
1559         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1560                       !(sts & DMA_GSTS_QIES), sts);
1561 end:
1562         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1563 }
1564
1565 /*
1566  * Enable queued invalidation.
1567  */
1568 static void __dmar_enable_qi(struct intel_iommu *iommu)
1569 {
1570         u32 sts;
1571         unsigned long flags;
1572         struct q_inval *qi = iommu->qi;
1573         u64 val = virt_to_phys(qi->desc);
1574
1575         qi->free_head = qi->free_tail = 0;
1576         qi->free_cnt = QI_LENGTH;
1577
1578         /*
1579          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1580          * is present.
1581          */
1582         if (ecap_smts(iommu->ecap))
1583                 val |= (1 << 11) | 1;
1584
1585         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1586
1587         /* write zero to the tail reg */
1588         writel(0, iommu->reg + DMAR_IQT_REG);
1589
1590         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1591
1592         iommu->gcmd |= DMA_GCMD_QIE;
1593         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1594
1595         /* Make sure hardware complete it */
1596         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1597
1598         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1599 }
1600
1601 /*
1602  * Enable Queued Invalidation interface. This is a must to support
1603  * interrupt-remapping. Also used by DMA-remapping, which replaces
1604  * register based IOTLB invalidation.
1605  */
1606 int dmar_enable_qi(struct intel_iommu *iommu)
1607 {
1608         struct q_inval *qi;
1609         struct page *desc_page;
1610
1611         if (!ecap_qis(iommu->ecap))
1612                 return -ENOENT;
1613
1614         /*
1615          * queued invalidation is already setup and enabled.
1616          */
1617         if (iommu->qi)
1618                 return 0;
1619
1620         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1621         if (!iommu->qi)
1622                 return -ENOMEM;
1623
1624         qi = iommu->qi;
1625
1626         /*
1627          * Need two pages to accommodate 256 descriptors of 256 bits each
1628          * if the remapping hardware supports scalable mode translation.
1629          */
1630         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1631                                      !!ecap_smts(iommu->ecap));
1632         if (!desc_page) {
1633                 kfree(qi);
1634                 iommu->qi = NULL;
1635                 return -ENOMEM;
1636         }
1637
1638         qi->desc = page_address(desc_page);
1639
1640         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1641         if (!qi->desc_status) {
1642                 free_page((unsigned long) qi->desc);
1643                 kfree(qi);
1644                 iommu->qi = NULL;
1645                 return -ENOMEM;
1646         }
1647
1648         raw_spin_lock_init(&qi->q_lock);
1649
1650         __dmar_enable_qi(iommu);
1651
1652         return 0;
1653 }
1654
1655 /* iommu interrupt handling. Most stuff are MSI-like. */
1656
1657 enum faulttype {
1658         DMA_REMAP,
1659         INTR_REMAP,
1660         UNKNOWN,
1661 };
1662
1663 static const char *dma_remap_fault_reasons[] =
1664 {
1665         "Software",
1666         "Present bit in root entry is clear",
1667         "Present bit in context entry is clear",
1668         "Invalid context entry",
1669         "Access beyond MGAW",
1670         "PTE Write access is not set",
1671         "PTE Read access is not set",
1672         "Next page table ptr is invalid",
1673         "Root table address invalid",
1674         "Context table ptr is invalid",
1675         "non-zero reserved fields in RTP",
1676         "non-zero reserved fields in CTP",
1677         "non-zero reserved fields in PTE",
1678         "PCE for translation request specifies blocking",
1679 };
1680
1681 static const char * const dma_remap_sm_fault_reasons[] = {
1682         "SM: Invalid Root Table Address",
1683         "SM: TTM 0 for request with PASID",
1684         "SM: TTM 0 for page group request",
1685         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1686         "SM: Error attempting to access Root Entry",
1687         "SM: Present bit in Root Entry is clear",
1688         "SM: Non-zero reserved field set in Root Entry",
1689         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1690         "SM: Error attempting to access Context Entry",
1691         "SM: Present bit in Context Entry is clear",
1692         "SM: Non-zero reserved field set in the Context Entry",
1693         "SM: Invalid Context Entry",
1694         "SM: DTE field in Context Entry is clear",
1695         "SM: PASID Enable field in Context Entry is clear",
1696         "SM: PASID is larger than the max in Context Entry",
1697         "SM: PRE field in Context-Entry is clear",
1698         "SM: RID_PASID field error in Context-Entry",
1699         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1700         "SM: Error attempting to access the PASID Directory Entry",
1701         "SM: Present bit in Directory Entry is clear",
1702         "SM: Non-zero reserved field set in PASID Directory Entry",
1703         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1704         "SM: Error attempting to access PASID Table Entry",
1705         "SM: Present bit in PASID Table Entry is clear",
1706         "SM: Non-zero reserved field set in PASID Table Entry",
1707         "SM: Invalid Scalable-Mode PASID Table Entry",
1708         "SM: ERE field is clear in PASID Table Entry",
1709         "SM: SRE field is clear in PASID Table Entry",
1710         "Unknown", "Unknown",/* 0x5E-0x5F */
1711         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1712         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1713         "SM: Error attempting to access first-level paging entry",
1714         "SM: Present bit in first-level paging entry is clear",
1715         "SM: Non-zero reserved field set in first-level paging entry",
1716         "SM: Error attempting to access FL-PML4 entry",
1717         "SM: First-level entry address beyond MGAW in Nested translation",
1718         "SM: Read permission error in FL-PML4 entry in Nested translation",
1719         "SM: Read permission error in first-level paging entry in Nested translation",
1720         "SM: Write permission error in first-level paging entry in Nested translation",
1721         "SM: Error attempting to access second-level paging entry",
1722         "SM: Read/Write permission error in second-level paging entry",
1723         "SM: Non-zero reserved field set in second-level paging entry",
1724         "SM: Invalid second-level page table pointer",
1725         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1726         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1727         "SM: Address in first-level translation is not canonical",
1728         "SM: U/S set 0 for first-level translation with user privilege",
1729         "SM: No execute permission for request with PASID and ER=1",
1730         "SM: Address beyond the DMA hardware max",
1731         "SM: Second-level entry address beyond the max",
1732         "SM: No write permission for Write/AtomicOp request",
1733         "SM: No read permission for Read/AtomicOp request",
1734         "SM: Invalid address-interrupt address",
1735         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1736         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1737 };
1738
1739 static const char *irq_remap_fault_reasons[] =
1740 {
1741         "Detected reserved fields in the decoded interrupt-remapped request",
1742         "Interrupt index exceeded the interrupt-remapping table size",
1743         "Present field in the IRTE entry is clear",
1744         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1745         "Detected reserved fields in the IRTE entry",
1746         "Blocked a compatibility format interrupt request",
1747         "Blocked an interrupt request due to source-id verification failure",
1748 };
1749
1750 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1751 {
1752         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1753                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1754                 *fault_type = INTR_REMAP;
1755                 return irq_remap_fault_reasons[fault_reason - 0x20];
1756         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1757                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1758                 *fault_type = DMA_REMAP;
1759                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1760         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1761                 *fault_type = DMA_REMAP;
1762                 return dma_remap_fault_reasons[fault_reason];
1763         } else {
1764                 *fault_type = UNKNOWN;
1765                 return "Unknown";
1766         }
1767 }
1768
1769
1770 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1771 {
1772         if (iommu->irq == irq)
1773                 return DMAR_FECTL_REG;
1774         else if (iommu->pr_irq == irq)
1775                 return DMAR_PECTL_REG;
1776         else
1777                 BUG();
1778 }
1779
1780 void dmar_msi_unmask(struct irq_data *data)
1781 {
1782         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1783         int reg = dmar_msi_reg(iommu, data->irq);
1784         unsigned long flag;
1785
1786         /* unmask it */
1787         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1788         writel(0, iommu->reg + reg);
1789         /* Read a reg to force flush the post write */
1790         readl(iommu->reg + reg);
1791         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1792 }
1793
1794 void dmar_msi_mask(struct irq_data *data)
1795 {
1796         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1797         int reg = dmar_msi_reg(iommu, data->irq);
1798         unsigned long flag;
1799
1800         /* mask it */
1801         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1802         writel(DMA_FECTL_IM, iommu->reg + reg);
1803         /* Read a reg to force flush the post write */
1804         readl(iommu->reg + reg);
1805         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1806 }
1807
1808 void dmar_msi_write(int irq, struct msi_msg *msg)
1809 {
1810         struct intel_iommu *iommu = irq_get_handler_data(irq);
1811         int reg = dmar_msi_reg(iommu, irq);
1812         unsigned long flag;
1813
1814         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1815         writel(msg->data, iommu->reg + reg + 4);
1816         writel(msg->address_lo, iommu->reg + reg + 8);
1817         writel(msg->address_hi, iommu->reg + reg + 12);
1818         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1819 }
1820
1821 void dmar_msi_read(int irq, struct msi_msg *msg)
1822 {
1823         struct intel_iommu *iommu = irq_get_handler_data(irq);
1824         int reg = dmar_msi_reg(iommu, irq);
1825         unsigned long flag;
1826
1827         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1828         msg->data = readl(iommu->reg + reg + 4);
1829         msg->address_lo = readl(iommu->reg + reg + 8);
1830         msg->address_hi = readl(iommu->reg + reg + 12);
1831         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1832 }
1833
1834 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1835                 u8 fault_reason, u32 pasid, u16 source_id,
1836                 unsigned long long addr)
1837 {
1838         const char *reason;
1839         int fault_type;
1840
1841         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1842
1843         if (fault_type == INTR_REMAP)
1844                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1845                         source_id >> 8, PCI_SLOT(source_id & 0xFF),
1846                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1847                         fault_reason, reason);
1848         else
1849                 pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1850                        type ? "DMA Read" : "DMA Write",
1851                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1852                        PCI_FUNC(source_id & 0xFF), pasid, addr,
1853                        fault_reason, reason);
1854         return 0;
1855 }
1856
1857 #define PRIMARY_FAULT_REG_LEN (16)
1858 irqreturn_t dmar_fault(int irq, void *dev_id)
1859 {
1860         struct intel_iommu *iommu = dev_id;
1861         int reg, fault_index;
1862         u32 fault_status;
1863         unsigned long flag;
1864         static DEFINE_RATELIMIT_STATE(rs,
1865                                       DEFAULT_RATELIMIT_INTERVAL,
1866                                       DEFAULT_RATELIMIT_BURST);
1867
1868         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1869         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1870         if (fault_status && __ratelimit(&rs))
1871                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1872
1873         /* TBD: ignore advanced fault log currently */
1874         if (!(fault_status & DMA_FSTS_PPF))
1875                 goto unlock_exit;
1876
1877         fault_index = dma_fsts_fault_record_index(fault_status);
1878         reg = cap_fault_reg_offset(iommu->cap);
1879         while (1) {
1880                 /* Disable printing, simply clear the fault when ratelimited */
1881                 bool ratelimited = !__ratelimit(&rs);
1882                 u8 fault_reason;
1883                 u16 source_id;
1884                 u64 guest_addr;
1885                 u32 pasid;
1886                 int type;
1887                 u32 data;
1888                 bool pasid_present;
1889
1890                 /* highest 32 bits */
1891                 data = readl(iommu->reg + reg +
1892                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1893                 if (!(data & DMA_FRCD_F))
1894                         break;
1895
1896                 if (!ratelimited) {
1897                         fault_reason = dma_frcd_fault_reason(data);
1898                         type = dma_frcd_type(data);
1899
1900                         pasid = dma_frcd_pasid_value(data);
1901                         data = readl(iommu->reg + reg +
1902                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1903                         source_id = dma_frcd_source_id(data);
1904
1905                         pasid_present = dma_frcd_pasid_present(data);
1906                         guest_addr = dmar_readq(iommu->reg + reg +
1907                                         fault_index * PRIMARY_FAULT_REG_LEN);
1908                         guest_addr = dma_frcd_page_addr(guest_addr);
1909                 }
1910
1911                 /* clear the fault */
1912                 writel(DMA_FRCD_F, iommu->reg + reg +
1913                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1914
1915                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1916
1917                 if (!ratelimited)
1918                         /* Using pasid -1 if pasid is not present */
1919                         dmar_fault_do_one(iommu, type, fault_reason,
1920                                           pasid_present ? pasid : -1,
1921                                           source_id, guest_addr);
1922
1923                 fault_index++;
1924                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1925                         fault_index = 0;
1926                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1927         }
1928
1929         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1930                iommu->reg + DMAR_FSTS_REG);
1931
1932 unlock_exit:
1933         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1934         return IRQ_HANDLED;
1935 }
1936
1937 int dmar_set_interrupt(struct intel_iommu *iommu)
1938 {
1939         int irq, ret;
1940
1941         /*
1942          * Check if the fault interrupt is already initialized.
1943          */
1944         if (iommu->irq)
1945                 return 0;
1946
1947         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1948         if (irq > 0) {
1949                 iommu->irq = irq;
1950         } else {
1951                 pr_err("No free IRQ vectors\n");
1952                 return -EINVAL;
1953         }
1954
1955         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1956         if (ret)
1957                 pr_err("Can't request irq\n");
1958         return ret;
1959 }
1960
1961 int __init enable_drhd_fault_handling(void)
1962 {
1963         struct dmar_drhd_unit *drhd;
1964         struct intel_iommu *iommu;
1965
1966         /*
1967          * Enable fault control interrupt.
1968          */
1969         for_each_iommu(iommu, drhd) {
1970                 u32 fault_status;
1971                 int ret = dmar_set_interrupt(iommu);
1972
1973                 if (ret) {
1974                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1975                                (unsigned long long)drhd->reg_base_addr, ret);
1976                         return -1;
1977                 }
1978
1979                 /*
1980                  * Clear any previous faults.
1981                  */
1982                 dmar_fault(iommu->irq, iommu);
1983                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1984                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1985         }
1986
1987         return 0;
1988 }
1989
1990 /*
1991  * Re-enable Queued Invalidation interface.
1992  */
1993 int dmar_reenable_qi(struct intel_iommu *iommu)
1994 {
1995         if (!ecap_qis(iommu->ecap))
1996                 return -ENOENT;
1997
1998         if (!iommu->qi)
1999                 return -ENOENT;
2000
2001         /*
2002          * First disable queued invalidation.
2003          */
2004         dmar_disable_qi(iommu);
2005         /*
2006          * Then enable queued invalidation again. Since there is no pending
2007          * invalidation requests now, it's safe to re-enable queued
2008          * invalidation.
2009          */
2010         __dmar_enable_qi(iommu);
2011
2012         return 0;
2013 }
2014
2015 /*
2016  * Check interrupt remapping support in DMAR table description.
2017  */
2018 int __init dmar_ir_support(void)
2019 {
2020         struct acpi_table_dmar *dmar;
2021         dmar = (struct acpi_table_dmar *)dmar_tbl;
2022         if (!dmar)
2023                 return 0;
2024         return dmar->flags & 0x1;
2025 }
2026
2027 /* Check whether DMAR units are in use */
2028 static inline bool dmar_in_use(void)
2029 {
2030         return irq_remapping_enabled || intel_iommu_enabled;
2031 }
2032
2033 static int __init dmar_free_unused_resources(void)
2034 {
2035         struct dmar_drhd_unit *dmaru, *dmaru_n;
2036
2037         if (dmar_in_use())
2038                 return 0;
2039
2040         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2041                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2042
2043         down_write(&dmar_global_lock);
2044         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2045                 list_del(&dmaru->list);
2046                 dmar_free_drhd(dmaru);
2047         }
2048         up_write(&dmar_global_lock);
2049
2050         return 0;
2051 }
2052
2053 late_initcall(dmar_free_unused_resources);
2054 IOMMU_INIT_POST(detect_intel_iommu);
2055
2056 /*
2057  * DMAR Hotplug Support
2058  * For more details, please refer to Intel(R) Virtualization Technology
2059  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2060  * "Remapping Hardware Unit Hot Plug".
2061  */
2062 static guid_t dmar_hp_guid =
2063         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2064                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2065
2066 /*
2067  * Currently there's only one revision and BIOS will not check the revision id,
2068  * so use 0 for safety.
2069  */
2070 #define DMAR_DSM_REV_ID                 0
2071 #define DMAR_DSM_FUNC_DRHD              1
2072 #define DMAR_DSM_FUNC_ATSR              2
2073 #define DMAR_DSM_FUNC_RHSA              3
2074
2075 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2076 {
2077         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2078 }
2079
2080 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2081                                   dmar_res_handler_t handler, void *arg)
2082 {
2083         int ret = -ENODEV;
2084         union acpi_object *obj;
2085         struct acpi_dmar_header *start;
2086         struct dmar_res_callback callback;
2087         static int res_type[] = {
2088                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2089                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2090                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2091         };
2092
2093         if (!dmar_detect_dsm(handle, func))
2094                 return 0;
2095
2096         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2097                                       func, NULL, ACPI_TYPE_BUFFER);
2098         if (!obj)
2099                 return -ENODEV;
2100
2101         memset(&callback, 0, sizeof(callback));
2102         callback.cb[res_type[func]] = handler;
2103         callback.arg[res_type[func]] = arg;
2104         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2105         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2106
2107         ACPI_FREE(obj);
2108
2109         return ret;
2110 }
2111
2112 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2113 {
2114         int ret;
2115         struct dmar_drhd_unit *dmaru;
2116
2117         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2118         if (!dmaru)
2119                 return -ENODEV;
2120
2121         ret = dmar_ir_hotplug(dmaru, true);
2122         if (ret == 0)
2123                 ret = dmar_iommu_hotplug(dmaru, true);
2124
2125         return ret;
2126 }
2127
2128 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2129 {
2130         int i, ret;
2131         struct device *dev;
2132         struct dmar_drhd_unit *dmaru;
2133
2134         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2135         if (!dmaru)
2136                 return 0;
2137
2138         /*
2139          * All PCI devices managed by this unit should have been destroyed.
2140          */
2141         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2142                 for_each_active_dev_scope(dmaru->devices,
2143                                           dmaru->devices_cnt, i, dev)
2144                         return -EBUSY;
2145         }
2146
2147         ret = dmar_ir_hotplug(dmaru, false);
2148         if (ret == 0)
2149                 ret = dmar_iommu_hotplug(dmaru, false);
2150
2151         return ret;
2152 }
2153
2154 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2155 {
2156         struct dmar_drhd_unit *dmaru;
2157
2158         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2159         if (dmaru) {
2160                 list_del_rcu(&dmaru->list);
2161                 synchronize_rcu();
2162                 dmar_free_drhd(dmaru);
2163         }
2164
2165         return 0;
2166 }
2167
2168 static int dmar_hotplug_insert(acpi_handle handle)
2169 {
2170         int ret;
2171         int drhd_count = 0;
2172
2173         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2174                                      &dmar_validate_one_drhd, (void *)1);
2175         if (ret)
2176                 goto out;
2177
2178         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2179                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2180         if (ret == 0 && drhd_count == 0) {
2181                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2182                 goto out;
2183         } else if (ret) {
2184                 goto release_drhd;
2185         }
2186
2187         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2188                                      &dmar_parse_one_rhsa, NULL);
2189         if (ret)
2190                 goto release_drhd;
2191
2192         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2193                                      &dmar_parse_one_atsr, NULL);
2194         if (ret)
2195                 goto release_atsr;
2196
2197         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2198                                      &dmar_hp_add_drhd, NULL);
2199         if (!ret)
2200                 return 0;
2201
2202         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2203                                &dmar_hp_remove_drhd, NULL);
2204 release_atsr:
2205         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2206                                &dmar_release_one_atsr, NULL);
2207 release_drhd:
2208         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2209                                &dmar_hp_release_drhd, NULL);
2210 out:
2211         return ret;
2212 }
2213
2214 static int dmar_hotplug_remove(acpi_handle handle)
2215 {
2216         int ret;
2217
2218         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2219                                      &dmar_check_one_atsr, NULL);
2220         if (ret)
2221                 return ret;
2222
2223         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2224                                      &dmar_hp_remove_drhd, NULL);
2225         if (ret == 0) {
2226                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2227                                                &dmar_release_one_atsr, NULL));
2228                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2229                                                &dmar_hp_release_drhd, NULL));
2230         } else {
2231                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2232                                        &dmar_hp_add_drhd, NULL);
2233         }
2234
2235         return ret;
2236 }
2237
2238 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2239                                        void *context, void **retval)
2240 {
2241         acpi_handle *phdl = retval;
2242
2243         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2244                 *phdl = handle;
2245                 return AE_CTRL_TERMINATE;
2246         }
2247
2248         return AE_OK;
2249 }
2250
2251 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2252 {
2253         int ret;
2254         acpi_handle tmp = NULL;
2255         acpi_status status;
2256
2257         if (!dmar_in_use())
2258                 return 0;
2259
2260         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2261                 tmp = handle;
2262         } else {
2263                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2264                                              ACPI_UINT32_MAX,
2265                                              dmar_get_dsm_handle,
2266                                              NULL, NULL, &tmp);
2267                 if (ACPI_FAILURE(status)) {
2268                         pr_warn("Failed to locate _DSM method.\n");
2269                         return -ENXIO;
2270                 }
2271         }
2272         if (tmp == NULL)
2273                 return 0;
2274
2275         down_write(&dmar_global_lock);
2276         if (insert)
2277                 ret = dmar_hotplug_insert(tmp);
2278         else
2279                 ret = dmar_hotplug_remove(tmp);
2280         up_write(&dmar_global_lock);
2281
2282         return ret;
2283 }
2284
2285 int dmar_device_add(acpi_handle handle)
2286 {
2287         return dmar_device_hotplug(handle, true);
2288 }
2289
2290 int dmar_device_remove(acpi_handle handle)
2291 {
2292         return dmar_device_hotplug(handle, false);
2293 }
2294
2295 /*
2296  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2297  *
2298  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2299  * the ACPI DMAR table. This means that the platform boot firmware has made
2300  * sure no device can issue DMA outside of RMRR regions.
2301  */
2302 bool dmar_platform_optin(void)
2303 {
2304         struct acpi_table_dmar *dmar;
2305         acpi_status status;
2306         bool ret;
2307
2308         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2309                                 (struct acpi_table_header **)&dmar);
2310         if (ACPI_FAILURE(status))
2311                 return false;
2312
2313         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2314         acpi_put_table((struct acpi_table_header *)dmar);
2315
2316         return ret;
2317 }
2318 EXPORT_SYMBOL_GPL(dmar_platform_optin);