Linux 6.0-rc1
[linux-2.6-microblaze.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/timer.h>
23 #include <linux/irq.h>
24 #include <linux/interrupt.h>
25 #include <linux/tboot.h>
26 #include <linux/dmi.h>
27 #include <linux/slab.h>
28 #include <linux/iommu.h>
29 #include <linux/numa.h>
30 #include <linux/limits.h>
31 #include <asm/irq_remapping.h>
32
33 #include "iommu.h"
34 #include "../irq_remapping.h"
35 #include "perf.h"
36 #include "trace.h"
37
38 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
39 struct dmar_res_callback {
40         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
41         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
42         bool                    ignore_unhandled;
43         bool                    print_entry;
44 };
45
46 /*
47  * Assumptions:
48  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
49  *    before IO devices managed by that unit.
50  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
51  *    after IO devices managed by that unit.
52  * 3) Hotplug events are rare.
53  *
54  * Locking rules for DMA and interrupt remapping related global data structures:
55  * 1) Use dmar_global_lock in process context
56  * 2) Use RCU in interrupt context
57  */
58 DECLARE_RWSEM(dmar_global_lock);
59 LIST_HEAD(dmar_drhd_units);
60
61 struct acpi_table_header * __initdata dmar_tbl;
62 static int dmar_dev_scope_status = 1;
63 static DEFINE_IDA(dmar_seq_ids);
64
65 static int alloc_iommu(struct dmar_drhd_unit *drhd);
66 static void free_iommu(struct intel_iommu *iommu);
67
68 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
69 {
70         /*
71          * add INCLUDE_ALL at the tail, so scan the list will find it at
72          * the very end.
73          */
74         if (drhd->include_all)
75                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
76         else
77                 list_add_rcu(&drhd->list, &dmar_drhd_units);
78 }
79
80 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
81 {
82         struct acpi_dmar_device_scope *scope;
83
84         *cnt = 0;
85         while (start < end) {
86                 scope = start;
87                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
88                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
90                         (*cnt)++;
91                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
92                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
93                         pr_warn("Unsupported device scope\n");
94                 }
95                 start += scope->length;
96         }
97         if (*cnt == 0)
98                 return NULL;
99
100         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
101 }
102
103 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
104 {
105         int i;
106         struct device *tmp_dev;
107
108         if (*devices && *cnt) {
109                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
110                         put_device(tmp_dev);
111                 kfree(*devices);
112         }
113
114         *devices = NULL;
115         *cnt = 0;
116 }
117
118 /* Optimize out kzalloc()/kfree() for normal cases */
119 static char dmar_pci_notify_info_buf[64];
120
121 static struct dmar_pci_notify_info *
122 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
123 {
124         int level = 0;
125         size_t size;
126         struct pci_dev *tmp;
127         struct dmar_pci_notify_info *info;
128
129         BUG_ON(dev->is_virtfn);
130
131         /*
132          * Ignore devices that have a domain number higher than what can
133          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
134          */
135         if (pci_domain_nr(dev->bus) > U16_MAX)
136                 return NULL;
137
138         /* Only generate path[] for device addition event */
139         if (event == BUS_NOTIFY_ADD_DEVICE)
140                 for (tmp = dev; tmp; tmp = tmp->bus->self)
141                         level++;
142
143         size = struct_size(info, path, level);
144         if (size <= sizeof(dmar_pci_notify_info_buf)) {
145                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
146         } else {
147                 info = kzalloc(size, GFP_KERNEL);
148                 if (!info) {
149                         if (dmar_dev_scope_status == 0)
150                                 dmar_dev_scope_status = -ENOMEM;
151                         return NULL;
152                 }
153         }
154
155         info->event = event;
156         info->dev = dev;
157         info->seg = pci_domain_nr(dev->bus);
158         info->level = level;
159         if (event == BUS_NOTIFY_ADD_DEVICE) {
160                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
161                         level--;
162                         info->path[level].bus = tmp->bus->number;
163                         info->path[level].device = PCI_SLOT(tmp->devfn);
164                         info->path[level].function = PCI_FUNC(tmp->devfn);
165                         if (pci_is_root_bus(tmp->bus))
166                                 info->bus = tmp->bus->number;
167                 }
168         }
169
170         return info;
171 }
172
173 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
174 {
175         if ((void *)info != dmar_pci_notify_info_buf)
176                 kfree(info);
177 }
178
179 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
180                                 struct acpi_dmar_pci_path *path, int count)
181 {
182         int i;
183
184         if (info->bus != bus)
185                 goto fallback;
186         if (info->level != count)
187                 goto fallback;
188
189         for (i = 0; i < count; i++) {
190                 if (path[i].device != info->path[i].device ||
191                     path[i].function != info->path[i].function)
192                         goto fallback;
193         }
194
195         return true;
196
197 fallback:
198
199         if (count != 1)
200                 return false;
201
202         i = info->level - 1;
203         if (bus              == info->path[i].bus &&
204             path[0].device   == info->path[i].device &&
205             path[0].function == info->path[i].function) {
206                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
207                         bus, path[0].device, path[0].function);
208                 return true;
209         }
210
211         return false;
212 }
213
214 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
215 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
216                           void *start, void*end, u16 segment,
217                           struct dmar_dev_scope *devices,
218                           int devices_cnt)
219 {
220         int i, level;
221         struct device *tmp, *dev = &info->dev->dev;
222         struct acpi_dmar_device_scope *scope;
223         struct acpi_dmar_pci_path *path;
224
225         if (segment != info->seg)
226                 return 0;
227
228         for (; start < end; start += scope->length) {
229                 scope = start;
230                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
231                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
232                         continue;
233
234                 path = (struct acpi_dmar_pci_path *)(scope + 1);
235                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
236                 if (!dmar_match_pci_path(info, scope->bus, path, level))
237                         continue;
238
239                 /*
240                  * We expect devices with endpoint scope to have normal PCI
241                  * headers, and devices with bridge scope to have bridge PCI
242                  * headers.  However PCI NTB devices may be listed in the
243                  * DMAR table with bridge scope, even though they have a
244                  * normal PCI header.  NTB devices are identified by class
245                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
246                  * for this special case.
247                  */
248                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
249                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
250                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
251                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
252                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
253                         pr_warn("Device scope type does not match for %s\n",
254                                 pci_name(info->dev));
255                         return -EINVAL;
256                 }
257
258                 for_each_dev_scope(devices, devices_cnt, i, tmp)
259                         if (tmp == NULL) {
260                                 devices[i].bus = info->dev->bus->number;
261                                 devices[i].devfn = info->dev->devfn;
262                                 rcu_assign_pointer(devices[i].dev,
263                                                    get_device(dev));
264                                 return 1;
265                         }
266                 BUG_ON(i >= devices_cnt);
267         }
268
269         return 0;
270 }
271
272 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
273                           struct dmar_dev_scope *devices, int count)
274 {
275         int index;
276         struct device *tmp;
277
278         if (info->seg != segment)
279                 return 0;
280
281         for_each_active_dev_scope(devices, count, index, tmp)
282                 if (tmp == &info->dev->dev) {
283                         RCU_INIT_POINTER(devices[index].dev, NULL);
284                         synchronize_rcu();
285                         put_device(tmp);
286                         return 1;
287                 }
288
289         return 0;
290 }
291
292 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
293 {
294         int ret = 0;
295         struct dmar_drhd_unit *dmaru;
296         struct acpi_dmar_hardware_unit *drhd;
297
298         for_each_drhd_unit(dmaru) {
299                 if (dmaru->include_all)
300                         continue;
301
302                 drhd = container_of(dmaru->hdr,
303                                     struct acpi_dmar_hardware_unit, header);
304                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
305                                 ((void *)drhd) + drhd->header.length,
306                                 dmaru->segment,
307                                 dmaru->devices, dmaru->devices_cnt);
308                 if (ret)
309                         break;
310         }
311         if (ret >= 0)
312                 ret = dmar_iommu_notify_scope_dev(info);
313         if (ret < 0 && dmar_dev_scope_status == 0)
314                 dmar_dev_scope_status = ret;
315
316         if (ret >= 0)
317                 intel_irq_remap_add_device(info);
318
319         return ret;
320 }
321
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324         struct dmar_drhd_unit *dmaru;
325
326         for_each_drhd_unit(dmaru)
327                 if (dmar_remove_dev_scope(info, dmaru->segment,
328                         dmaru->devices, dmaru->devices_cnt))
329                         break;
330         dmar_iommu_notify_scope_dev(info);
331 }
332
333 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
334 {
335         struct pci_dev *physfn = pci_physfn(pdev);
336
337         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
338 }
339
340 static int dmar_pci_bus_notifier(struct notifier_block *nb,
341                                  unsigned long action, void *data)
342 {
343         struct pci_dev *pdev = to_pci_dev(data);
344         struct dmar_pci_notify_info *info;
345
346         /* Only care about add/remove events for physical functions.
347          * For VFs we actually do the lookup based on the corresponding
348          * PF in device_to_iommu() anyway. */
349         if (pdev->is_virtfn) {
350                 /*
351                  * Ensure that the VF device inherits the irq domain of the
352                  * PF device. Ideally the device would inherit the domain
353                  * from the bus, but DMAR can have multiple units per bus
354                  * which makes this impossible. The VF 'bus' could inherit
355                  * from the PF device, but that's yet another x86'sism to
356                  * inflict on everybody else.
357                  */
358                 if (action == BUS_NOTIFY_ADD_DEVICE)
359                         vf_inherit_msi_domain(pdev);
360                 return NOTIFY_DONE;
361         }
362
363         if (action != BUS_NOTIFY_ADD_DEVICE &&
364             action != BUS_NOTIFY_REMOVED_DEVICE)
365                 return NOTIFY_DONE;
366
367         info = dmar_alloc_pci_notify_info(pdev, action);
368         if (!info)
369                 return NOTIFY_DONE;
370
371         down_write(&dmar_global_lock);
372         if (action == BUS_NOTIFY_ADD_DEVICE)
373                 dmar_pci_bus_add_dev(info);
374         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
375                 dmar_pci_bus_del_dev(info);
376         up_write(&dmar_global_lock);
377
378         dmar_free_pci_notify_info(info);
379
380         return NOTIFY_OK;
381 }
382
383 static struct notifier_block dmar_pci_bus_nb = {
384         .notifier_call = dmar_pci_bus_notifier,
385         .priority = 1,
386 };
387
388 static struct dmar_drhd_unit *
389 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
390 {
391         struct dmar_drhd_unit *dmaru;
392
393         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
394                                 dmar_rcu_check())
395                 if (dmaru->segment == drhd->segment &&
396                     dmaru->reg_base_addr == drhd->address)
397                         return dmaru;
398
399         return NULL;
400 }
401
402 /*
403  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
404  * structure which uniquely represent one DMA remapping hardware unit
405  * present in the platform
406  */
407 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
408 {
409         struct acpi_dmar_hardware_unit *drhd;
410         struct dmar_drhd_unit *dmaru;
411         int ret;
412
413         drhd = (struct acpi_dmar_hardware_unit *)header;
414         dmaru = dmar_find_dmaru(drhd);
415         if (dmaru)
416                 goto out;
417
418         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
419         if (!dmaru)
420                 return -ENOMEM;
421
422         /*
423          * If header is allocated from slab by ACPI _DSM method, we need to
424          * copy the content because the memory buffer will be freed on return.
425          */
426         dmaru->hdr = (void *)(dmaru + 1);
427         memcpy(dmaru->hdr, header, header->length);
428         dmaru->reg_base_addr = drhd->address;
429         dmaru->segment = drhd->segment;
430         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
431         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
432                                               ((void *)drhd) + drhd->header.length,
433                                               &dmaru->devices_cnt);
434         if (dmaru->devices_cnt && dmaru->devices == NULL) {
435                 kfree(dmaru);
436                 return -ENOMEM;
437         }
438
439         ret = alloc_iommu(dmaru);
440         if (ret) {
441                 dmar_free_dev_scope(&dmaru->devices,
442                                     &dmaru->devices_cnt);
443                 kfree(dmaru);
444                 return ret;
445         }
446         dmar_register_drhd_unit(dmaru);
447
448 out:
449         if (arg)
450                 (*(int *)arg)++;
451
452         return 0;
453 }
454
455 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
456 {
457         if (dmaru->devices && dmaru->devices_cnt)
458                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
459         if (dmaru->iommu)
460                 free_iommu(dmaru->iommu);
461         kfree(dmaru);
462 }
463
464 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
465                                       void *arg)
466 {
467         struct acpi_dmar_andd *andd = (void *)header;
468
469         /* Check for NUL termination within the designated length */
470         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
471                 pr_warn(FW_BUG
472                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
473                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
474                            dmi_get_system_info(DMI_BIOS_VENDOR),
475                            dmi_get_system_info(DMI_BIOS_VERSION),
476                            dmi_get_system_info(DMI_PRODUCT_VERSION));
477                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
478                 return -EINVAL;
479         }
480         pr_info("ANDD device: %x name: %s\n", andd->device_number,
481                 andd->device_name);
482
483         return 0;
484 }
485
486 #ifdef CONFIG_ACPI_NUMA
487 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
488 {
489         struct acpi_dmar_rhsa *rhsa;
490         struct dmar_drhd_unit *drhd;
491
492         rhsa = (struct acpi_dmar_rhsa *)header;
493         for_each_drhd_unit(drhd) {
494                 if (drhd->reg_base_addr == rhsa->base_address) {
495                         int node = pxm_to_node(rhsa->proximity_domain);
496
497                         if (node != NUMA_NO_NODE && !node_online(node))
498                                 node = NUMA_NO_NODE;
499                         drhd->iommu->node = node;
500                         return 0;
501                 }
502         }
503         pr_warn(FW_BUG
504                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
505                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
506                 rhsa->base_address,
507                 dmi_get_system_info(DMI_BIOS_VENDOR),
508                 dmi_get_system_info(DMI_BIOS_VERSION),
509                 dmi_get_system_info(DMI_PRODUCT_VERSION));
510         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
511
512         return 0;
513 }
514 #else
515 #define dmar_parse_one_rhsa             dmar_res_noop
516 #endif
517
518 static void
519 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
520 {
521         struct acpi_dmar_hardware_unit *drhd;
522         struct acpi_dmar_reserved_memory *rmrr;
523         struct acpi_dmar_atsr *atsr;
524         struct acpi_dmar_rhsa *rhsa;
525         struct acpi_dmar_satc *satc;
526
527         switch (header->type) {
528         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
529                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
530                                     header);
531                 pr_info("DRHD base: %#016Lx flags: %#x\n",
532                         (unsigned long long)drhd->address, drhd->flags);
533                 break;
534         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
535                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
536                                     header);
537                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
538                         (unsigned long long)rmrr->base_address,
539                         (unsigned long long)rmrr->end_address);
540                 break;
541         case ACPI_DMAR_TYPE_ROOT_ATS:
542                 atsr = container_of(header, struct acpi_dmar_atsr, header);
543                 pr_info("ATSR flags: %#x\n", atsr->flags);
544                 break;
545         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
546                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
547                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
548                        (unsigned long long)rhsa->base_address,
549                        rhsa->proximity_domain);
550                 break;
551         case ACPI_DMAR_TYPE_NAMESPACE:
552                 /* We don't print this here because we need to sanity-check
553                    it first. So print it in dmar_parse_one_andd() instead. */
554                 break;
555         case ACPI_DMAR_TYPE_SATC:
556                 satc = container_of(header, struct acpi_dmar_satc, header);
557                 pr_info("SATC flags: 0x%x\n", satc->flags);
558                 break;
559         }
560 }
561
562 /**
563  * dmar_table_detect - checks to see if the platform supports DMAR devices
564  */
565 static int __init dmar_table_detect(void)
566 {
567         acpi_status status = AE_OK;
568
569         /* if we could find DMAR table, then there are DMAR devices */
570         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
571
572         if (ACPI_SUCCESS(status) && !dmar_tbl) {
573                 pr_warn("Unable to map DMAR\n");
574                 status = AE_NOT_FOUND;
575         }
576
577         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
578 }
579
580 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
581                                        size_t len, struct dmar_res_callback *cb)
582 {
583         struct acpi_dmar_header *iter, *next;
584         struct acpi_dmar_header *end = ((void *)start) + len;
585
586         for (iter = start; iter < end; iter = next) {
587                 next = (void *)iter + iter->length;
588                 if (iter->length == 0) {
589                         /* Avoid looping forever on bad ACPI tables */
590                         pr_debug(FW_BUG "Invalid 0-length structure\n");
591                         break;
592                 } else if (next > end) {
593                         /* Avoid passing table end */
594                         pr_warn(FW_BUG "Record passes table end\n");
595                         return -EINVAL;
596                 }
597
598                 if (cb->print_entry)
599                         dmar_table_print_dmar_entry(iter);
600
601                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
602                         /* continue for forward compatibility */
603                         pr_debug("Unknown DMAR structure type %d\n",
604                                  iter->type);
605                 } else if (cb->cb[iter->type]) {
606                         int ret;
607
608                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
609                         if (ret)
610                                 return ret;
611                 } else if (!cb->ignore_unhandled) {
612                         pr_warn("No handler for DMAR structure type %d\n",
613                                 iter->type);
614                         return -EINVAL;
615                 }
616         }
617
618         return 0;
619 }
620
621 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
622                                        struct dmar_res_callback *cb)
623 {
624         return dmar_walk_remapping_entries((void *)(dmar + 1),
625                         dmar->header.length - sizeof(*dmar), cb);
626 }
627
628 /**
629  * parse_dmar_table - parses the DMA reporting table
630  */
631 static int __init
632 parse_dmar_table(void)
633 {
634         struct acpi_table_dmar *dmar;
635         int drhd_count = 0;
636         int ret;
637         struct dmar_res_callback cb = {
638                 .print_entry = true,
639                 .ignore_unhandled = true,
640                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
641                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
642                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
643                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
644                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
645                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
646                 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
647         };
648
649         /*
650          * Do it again, earlier dmar_tbl mapping could be mapped with
651          * fixed map.
652          */
653         dmar_table_detect();
654
655         /*
656          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
657          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
658          */
659         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
660
661         dmar = (struct acpi_table_dmar *)dmar_tbl;
662         if (!dmar)
663                 return -ENODEV;
664
665         if (dmar->width < PAGE_SHIFT - 1) {
666                 pr_warn("Invalid DMAR haw\n");
667                 return -EINVAL;
668         }
669
670         pr_info("Host address width %d\n", dmar->width + 1);
671         ret = dmar_walk_dmar_table(dmar, &cb);
672         if (ret == 0 && drhd_count == 0)
673                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
674
675         return ret;
676 }
677
678 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
679                                  int cnt, struct pci_dev *dev)
680 {
681         int index;
682         struct device *tmp;
683
684         while (dev) {
685                 for_each_active_dev_scope(devices, cnt, index, tmp)
686                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
687                                 return 1;
688
689                 /* Check our parent */
690                 dev = dev->bus->self;
691         }
692
693         return 0;
694 }
695
696 struct dmar_drhd_unit *
697 dmar_find_matched_drhd_unit(struct pci_dev *dev)
698 {
699         struct dmar_drhd_unit *dmaru;
700         struct acpi_dmar_hardware_unit *drhd;
701
702         dev = pci_physfn(dev);
703
704         rcu_read_lock();
705         for_each_drhd_unit(dmaru) {
706                 drhd = container_of(dmaru->hdr,
707                                     struct acpi_dmar_hardware_unit,
708                                     header);
709
710                 if (dmaru->include_all &&
711                     drhd->segment == pci_domain_nr(dev->bus))
712                         goto out;
713
714                 if (dmar_pci_device_match(dmaru->devices,
715                                           dmaru->devices_cnt, dev))
716                         goto out;
717         }
718         dmaru = NULL;
719 out:
720         rcu_read_unlock();
721
722         return dmaru;
723 }
724
725 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
726                                               struct acpi_device *adev)
727 {
728         struct dmar_drhd_unit *dmaru;
729         struct acpi_dmar_hardware_unit *drhd;
730         struct acpi_dmar_device_scope *scope;
731         struct device *tmp;
732         int i;
733         struct acpi_dmar_pci_path *path;
734
735         for_each_drhd_unit(dmaru) {
736                 drhd = container_of(dmaru->hdr,
737                                     struct acpi_dmar_hardware_unit,
738                                     header);
739
740                 for (scope = (void *)(drhd + 1);
741                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
742                      scope = ((void *)scope) + scope->length) {
743                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
744                                 continue;
745                         if (scope->enumeration_id != device_number)
746                                 continue;
747
748                         path = (void *)(scope + 1);
749                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
750                                 dev_name(&adev->dev), dmaru->reg_base_addr,
751                                 scope->bus, path->device, path->function);
752                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
753                                 if (tmp == NULL) {
754                                         dmaru->devices[i].bus = scope->bus;
755                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
756                                                                             path->function);
757                                         rcu_assign_pointer(dmaru->devices[i].dev,
758                                                            get_device(&adev->dev));
759                                         return;
760                                 }
761                         BUG_ON(i >= dmaru->devices_cnt);
762                 }
763         }
764         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
765                 device_number, dev_name(&adev->dev));
766 }
767
768 static int __init dmar_acpi_dev_scope_init(void)
769 {
770         struct acpi_dmar_andd *andd;
771
772         if (dmar_tbl == NULL)
773                 return -ENODEV;
774
775         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
776              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
777              andd = ((void *)andd) + andd->header.length) {
778                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
779                         acpi_handle h;
780                         struct acpi_device *adev;
781
782                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
783                                                           andd->device_name,
784                                                           &h))) {
785                                 pr_err("Failed to find handle for ACPI object %s\n",
786                                        andd->device_name);
787                                 continue;
788                         }
789                         adev = acpi_fetch_acpi_dev(h);
790                         if (!adev) {
791                                 pr_err("Failed to get device for ACPI object %s\n",
792                                        andd->device_name);
793                                 continue;
794                         }
795                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
796                 }
797         }
798         return 0;
799 }
800
801 int __init dmar_dev_scope_init(void)
802 {
803         struct pci_dev *dev = NULL;
804         struct dmar_pci_notify_info *info;
805
806         if (dmar_dev_scope_status != 1)
807                 return dmar_dev_scope_status;
808
809         if (list_empty(&dmar_drhd_units)) {
810                 dmar_dev_scope_status = -ENODEV;
811         } else {
812                 dmar_dev_scope_status = 0;
813
814                 dmar_acpi_dev_scope_init();
815
816                 for_each_pci_dev(dev) {
817                         if (dev->is_virtfn)
818                                 continue;
819
820                         info = dmar_alloc_pci_notify_info(dev,
821                                         BUS_NOTIFY_ADD_DEVICE);
822                         if (!info) {
823                                 return dmar_dev_scope_status;
824                         } else {
825                                 dmar_pci_bus_add_dev(info);
826                                 dmar_free_pci_notify_info(info);
827                         }
828                 }
829         }
830
831         return dmar_dev_scope_status;
832 }
833
834 void __init dmar_register_bus_notifier(void)
835 {
836         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
837 }
838
839
840 int __init dmar_table_init(void)
841 {
842         static int dmar_table_initialized;
843         int ret;
844
845         if (dmar_table_initialized == 0) {
846                 ret = parse_dmar_table();
847                 if (ret < 0) {
848                         if (ret != -ENODEV)
849                                 pr_info("Parse DMAR table failure.\n");
850                 } else  if (list_empty(&dmar_drhd_units)) {
851                         pr_info("No DMAR devices found\n");
852                         ret = -ENODEV;
853                 }
854
855                 if (ret < 0)
856                         dmar_table_initialized = ret;
857                 else
858                         dmar_table_initialized = 1;
859         }
860
861         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
862 }
863
864 static void warn_invalid_dmar(u64 addr, const char *message)
865 {
866         pr_warn_once(FW_BUG
867                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
868                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
869                 addr, message,
870                 dmi_get_system_info(DMI_BIOS_VENDOR),
871                 dmi_get_system_info(DMI_BIOS_VERSION),
872                 dmi_get_system_info(DMI_PRODUCT_VERSION));
873         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
874 }
875
876 static int __ref
877 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
878 {
879         struct acpi_dmar_hardware_unit *drhd;
880         void __iomem *addr;
881         u64 cap, ecap;
882
883         drhd = (void *)entry;
884         if (!drhd->address) {
885                 warn_invalid_dmar(0, "");
886                 return -EINVAL;
887         }
888
889         if (arg)
890                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
891         else
892                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
893         if (!addr) {
894                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
895                 return -EINVAL;
896         }
897
898         cap = dmar_readq(addr + DMAR_CAP_REG);
899         ecap = dmar_readq(addr + DMAR_ECAP_REG);
900
901         if (arg)
902                 iounmap(addr);
903         else
904                 early_iounmap(addr, VTD_PAGE_SIZE);
905
906         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
907                 warn_invalid_dmar(drhd->address, " returns all ones");
908                 return -EINVAL;
909         }
910
911         return 0;
912 }
913
914 void __init detect_intel_iommu(void)
915 {
916         int ret;
917         struct dmar_res_callback validate_drhd_cb = {
918                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
919                 .ignore_unhandled = true,
920         };
921
922         down_write(&dmar_global_lock);
923         ret = dmar_table_detect();
924         if (!ret)
925                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
926                                            &validate_drhd_cb);
927         if (!ret && !no_iommu && !iommu_detected &&
928             (!dmar_disabled || dmar_platform_optin())) {
929                 iommu_detected = 1;
930                 /* Make sure ACS will be enabled */
931                 pci_request_acs();
932         }
933
934 #ifdef CONFIG_X86
935         if (!ret) {
936                 x86_init.iommu.iommu_init = intel_iommu_init;
937                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
938         }
939
940 #endif
941
942         if (dmar_tbl) {
943                 acpi_put_table(dmar_tbl);
944                 dmar_tbl = NULL;
945         }
946         up_write(&dmar_global_lock);
947 }
948
949 static void unmap_iommu(struct intel_iommu *iommu)
950 {
951         iounmap(iommu->reg);
952         release_mem_region(iommu->reg_phys, iommu->reg_size);
953 }
954
955 /**
956  * map_iommu: map the iommu's registers
957  * @iommu: the iommu to map
958  * @phys_addr: the physical address of the base resgister
959  *
960  * Memory map the iommu's registers.  Start w/ a single page, and
961  * possibly expand if that turns out to be insufficent.
962  */
963 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
964 {
965         int map_size, err=0;
966
967         iommu->reg_phys = phys_addr;
968         iommu->reg_size = VTD_PAGE_SIZE;
969
970         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
971                 pr_err("Can't reserve memory\n");
972                 err = -EBUSY;
973                 goto out;
974         }
975
976         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
977         if (!iommu->reg) {
978                 pr_err("Can't map the region\n");
979                 err = -ENOMEM;
980                 goto release;
981         }
982
983         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
984         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
985
986         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
987                 err = -EINVAL;
988                 warn_invalid_dmar(phys_addr, " returns all ones");
989                 goto unmap;
990         }
991         if (ecap_vcs(iommu->ecap))
992                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
993
994         /* the registers might be more than one page */
995         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
996                          cap_max_fault_reg_offset(iommu->cap));
997         map_size = VTD_PAGE_ALIGN(map_size);
998         if (map_size > iommu->reg_size) {
999                 iounmap(iommu->reg);
1000                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1001                 iommu->reg_size = map_size;
1002                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1003                                         iommu->name)) {
1004                         pr_err("Can't reserve memory\n");
1005                         err = -EBUSY;
1006                         goto out;
1007                 }
1008                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1009                 if (!iommu->reg) {
1010                         pr_err("Can't map the region\n");
1011                         err = -ENOMEM;
1012                         goto release;
1013                 }
1014         }
1015         err = 0;
1016         goto out;
1017
1018 unmap:
1019         iounmap(iommu->reg);
1020 release:
1021         release_mem_region(iommu->reg_phys, iommu->reg_size);
1022 out:
1023         return err;
1024 }
1025
1026 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1027 {
1028         struct intel_iommu *iommu;
1029         u32 ver, sts;
1030         int agaw = -1;
1031         int msagaw = -1;
1032         int err;
1033
1034         if (!drhd->reg_base_addr) {
1035                 warn_invalid_dmar(0, "");
1036                 return -EINVAL;
1037         }
1038
1039         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1040         if (!iommu)
1041                 return -ENOMEM;
1042
1043         iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0,
1044                                         DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL);
1045         if (iommu->seq_id < 0) {
1046                 pr_err("Failed to allocate seq_id\n");
1047                 err = iommu->seq_id;
1048                 goto error;
1049         }
1050         sprintf(iommu->name, "dmar%d", iommu->seq_id);
1051
1052         err = map_iommu(iommu, drhd->reg_base_addr);
1053         if (err) {
1054                 pr_err("Failed to map %s\n", iommu->name);
1055                 goto error_free_seq_id;
1056         }
1057
1058         err = -EINVAL;
1059         if (cap_sagaw(iommu->cap) == 0) {
1060                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1061                         iommu->name);
1062                 drhd->ignored = 1;
1063         }
1064
1065         if (!drhd->ignored) {
1066                 agaw = iommu_calculate_agaw(iommu);
1067                 if (agaw < 0) {
1068                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1069                                iommu->seq_id);
1070                         drhd->ignored = 1;
1071                 }
1072         }
1073         if (!drhd->ignored) {
1074                 msagaw = iommu_calculate_max_sagaw(iommu);
1075                 if (msagaw < 0) {
1076                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1077                                iommu->seq_id);
1078                         drhd->ignored = 1;
1079                         agaw = -1;
1080                 }
1081         }
1082         iommu->agaw = agaw;
1083         iommu->msagaw = msagaw;
1084         iommu->segment = drhd->segment;
1085
1086         iommu->node = NUMA_NO_NODE;
1087
1088         ver = readl(iommu->reg + DMAR_VER_REG);
1089         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1090                 iommu->name,
1091                 (unsigned long long)drhd->reg_base_addr,
1092                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1093                 (unsigned long long)iommu->cap,
1094                 (unsigned long long)iommu->ecap);
1095
1096         /* Reflect status in gcmd */
1097         sts = readl(iommu->reg + DMAR_GSTS_REG);
1098         if (sts & DMA_GSTS_IRES)
1099                 iommu->gcmd |= DMA_GCMD_IRE;
1100         if (sts & DMA_GSTS_TES)
1101                 iommu->gcmd |= DMA_GCMD_TE;
1102         if (sts & DMA_GSTS_QIES)
1103                 iommu->gcmd |= DMA_GCMD_QIE;
1104
1105         raw_spin_lock_init(&iommu->register_lock);
1106
1107         /*
1108          * This is only for hotplug; at boot time intel_iommu_enabled won't
1109          * be set yet. When intel_iommu_init() runs, it registers the units
1110          * present at boot time, then sets intel_iommu_enabled.
1111          */
1112         if (intel_iommu_enabled && !drhd->ignored) {
1113                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1114                                              intel_iommu_groups,
1115                                              "%s", iommu->name);
1116                 if (err)
1117                         goto err_unmap;
1118
1119                 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1120                 if (err)
1121                         goto err_sysfs;
1122         }
1123
1124         drhd->iommu = iommu;
1125         iommu->drhd = drhd;
1126
1127         return 0;
1128
1129 err_sysfs:
1130         iommu_device_sysfs_remove(&iommu->iommu);
1131 err_unmap:
1132         unmap_iommu(iommu);
1133 error_free_seq_id:
1134         ida_free(&dmar_seq_ids, iommu->seq_id);
1135 error:
1136         kfree(iommu);
1137         return err;
1138 }
1139
1140 static void free_iommu(struct intel_iommu *iommu)
1141 {
1142         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1143                 iommu_device_unregister(&iommu->iommu);
1144                 iommu_device_sysfs_remove(&iommu->iommu);
1145         }
1146
1147         if (iommu->irq) {
1148                 if (iommu->pr_irq) {
1149                         free_irq(iommu->pr_irq, iommu);
1150                         dmar_free_hwirq(iommu->pr_irq);
1151                         iommu->pr_irq = 0;
1152                 }
1153                 free_irq(iommu->irq, iommu);
1154                 dmar_free_hwirq(iommu->irq);
1155                 iommu->irq = 0;
1156         }
1157
1158         if (iommu->qi) {
1159                 free_page((unsigned long)iommu->qi->desc);
1160                 kfree(iommu->qi->desc_status);
1161                 kfree(iommu->qi);
1162         }
1163
1164         if (iommu->reg)
1165                 unmap_iommu(iommu);
1166
1167         ida_free(&dmar_seq_ids, iommu->seq_id);
1168         kfree(iommu);
1169 }
1170
1171 /*
1172  * Reclaim all the submitted descriptors which have completed its work.
1173  */
1174 static inline void reclaim_free_desc(struct q_inval *qi)
1175 {
1176         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1177                qi->desc_status[qi->free_tail] == QI_ABORT) {
1178                 qi->desc_status[qi->free_tail] = QI_FREE;
1179                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1180                 qi->free_cnt++;
1181         }
1182 }
1183
1184 static const char *qi_type_string(u8 type)
1185 {
1186         switch (type) {
1187         case QI_CC_TYPE:
1188                 return "Context-cache Invalidation";
1189         case QI_IOTLB_TYPE:
1190                 return "IOTLB Invalidation";
1191         case QI_DIOTLB_TYPE:
1192                 return "Device-TLB Invalidation";
1193         case QI_IEC_TYPE:
1194                 return "Interrupt Entry Cache Invalidation";
1195         case QI_IWD_TYPE:
1196                 return "Invalidation Wait";
1197         case QI_EIOTLB_TYPE:
1198                 return "PASID-based IOTLB Invalidation";
1199         case QI_PC_TYPE:
1200                 return "PASID-cache Invalidation";
1201         case QI_DEIOTLB_TYPE:
1202                 return "PASID-based Device-TLB Invalidation";
1203         case QI_PGRP_RESP_TYPE:
1204                 return "Page Group Response";
1205         default:
1206                 return "UNKNOWN";
1207         }
1208 }
1209
1210 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1211 {
1212         unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1213         u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1214         struct qi_desc *desc = iommu->qi->desc + head;
1215
1216         if (fault & DMA_FSTS_IQE)
1217                 pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1218                        DMAR_IQER_REG_IQEI(iqe_err));
1219         if (fault & DMA_FSTS_ITE)
1220                 pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1221                        DMAR_IQER_REG_ITESID(iqe_err));
1222         if (fault & DMA_FSTS_ICE)
1223                 pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1224                        DMAR_IQER_REG_ICESID(iqe_err));
1225
1226         pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1227                qi_type_string(desc->qw0 & 0xf),
1228                (unsigned long long)desc->qw0,
1229                (unsigned long long)desc->qw1);
1230
1231         head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1232         head <<= qi_shift(iommu);
1233         desc = iommu->qi->desc + head;
1234
1235         pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1236                qi_type_string(desc->qw0 & 0xf),
1237                (unsigned long long)desc->qw0,
1238                (unsigned long long)desc->qw1);
1239 }
1240
1241 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1242 {
1243         u32 fault;
1244         int head, tail;
1245         struct q_inval *qi = iommu->qi;
1246         int shift = qi_shift(iommu);
1247
1248         if (qi->desc_status[wait_index] == QI_ABORT)
1249                 return -EAGAIN;
1250
1251         fault = readl(iommu->reg + DMAR_FSTS_REG);
1252         if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1253                 qi_dump_fault(iommu, fault);
1254
1255         /*
1256          * If IQE happens, the head points to the descriptor associated
1257          * with the error. No new descriptors are fetched until the IQE
1258          * is cleared.
1259          */
1260         if (fault & DMA_FSTS_IQE) {
1261                 head = readl(iommu->reg + DMAR_IQH_REG);
1262                 if ((head >> shift) == index) {
1263                         struct qi_desc *desc = qi->desc + head;
1264
1265                         /*
1266                          * desc->qw2 and desc->qw3 are either reserved or
1267                          * used by software as private data. We won't print
1268                          * out these two qw's for security consideration.
1269                          */
1270                         memcpy(desc, qi->desc + (wait_index << shift),
1271                                1 << shift);
1272                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1273                         pr_info("Invalidation Queue Error (IQE) cleared\n");
1274                         return -EINVAL;
1275                 }
1276         }
1277
1278         /*
1279          * If ITE happens, all pending wait_desc commands are aborted.
1280          * No new descriptors are fetched until the ITE is cleared.
1281          */
1282         if (fault & DMA_FSTS_ITE) {
1283                 head = readl(iommu->reg + DMAR_IQH_REG);
1284                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1285                 head |= 1;
1286                 tail = readl(iommu->reg + DMAR_IQT_REG);
1287                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1288
1289                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1290                 pr_info("Invalidation Time-out Error (ITE) cleared\n");
1291
1292                 do {
1293                         if (qi->desc_status[head] == QI_IN_USE)
1294                                 qi->desc_status[head] = QI_ABORT;
1295                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1296                 } while (head != tail);
1297
1298                 if (qi->desc_status[wait_index] == QI_ABORT)
1299                         return -EAGAIN;
1300         }
1301
1302         if (fault & DMA_FSTS_ICE) {
1303                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1304                 pr_info("Invalidation Completion Error (ICE) cleared\n");
1305         }
1306
1307         return 0;
1308 }
1309
1310 /*
1311  * Function to submit invalidation descriptors of all types to the queued
1312  * invalidation interface(QI). Multiple descriptors can be submitted at a
1313  * time, a wait descriptor will be appended to each submission to ensure
1314  * hardware has completed the invalidation before return. Wait descriptors
1315  * can be part of the submission but it will not be polled for completion.
1316  */
1317 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1318                    unsigned int count, unsigned long options)
1319 {
1320         struct q_inval *qi = iommu->qi;
1321         s64 devtlb_start_ktime = 0;
1322         s64 iotlb_start_ktime = 0;
1323         s64 iec_start_ktime = 0;
1324         struct qi_desc wait_desc;
1325         int wait_index, index;
1326         unsigned long flags;
1327         int offset, shift;
1328         int rc, i;
1329         u64 type;
1330
1331         if (!qi)
1332                 return 0;
1333
1334         type = desc->qw0 & GENMASK_ULL(3, 0);
1335
1336         if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1337             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1338                 iotlb_start_ktime = ktime_to_ns(ktime_get());
1339
1340         if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1341             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1342                 devtlb_start_ktime = ktime_to_ns(ktime_get());
1343
1344         if (type == QI_IEC_TYPE &&
1345             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1346                 iec_start_ktime = ktime_to_ns(ktime_get());
1347
1348 restart:
1349         rc = 0;
1350
1351         raw_spin_lock_irqsave(&qi->q_lock, flags);
1352         /*
1353          * Check if we have enough empty slots in the queue to submit,
1354          * the calculation is based on:
1355          * # of desc + 1 wait desc + 1 space between head and tail
1356          */
1357         while (qi->free_cnt < count + 2) {
1358                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1359                 cpu_relax();
1360                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1361         }
1362
1363         index = qi->free_head;
1364         wait_index = (index + count) % QI_LENGTH;
1365         shift = qi_shift(iommu);
1366
1367         for (i = 0; i < count; i++) {
1368                 offset = ((index + i) % QI_LENGTH) << shift;
1369                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1370                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1371                 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1372                                 desc[i].qw2, desc[i].qw3);
1373         }
1374         qi->desc_status[wait_index] = QI_IN_USE;
1375
1376         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1377                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1378         if (options & QI_OPT_WAIT_DRAIN)
1379                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1380         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1381         wait_desc.qw2 = 0;
1382         wait_desc.qw3 = 0;
1383
1384         offset = wait_index << shift;
1385         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1386
1387         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1388         qi->free_cnt -= count + 1;
1389
1390         /*
1391          * update the HW tail register indicating the presence of
1392          * new descriptors.
1393          */
1394         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1395
1396         while (qi->desc_status[wait_index] != QI_DONE) {
1397                 /*
1398                  * We will leave the interrupts disabled, to prevent interrupt
1399                  * context to queue another cmd while a cmd is already submitted
1400                  * and waiting for completion on this cpu. This is to avoid
1401                  * a deadlock where the interrupt context can wait indefinitely
1402                  * for free slots in the queue.
1403                  */
1404                 rc = qi_check_fault(iommu, index, wait_index);
1405                 if (rc)
1406                         break;
1407
1408                 raw_spin_unlock(&qi->q_lock);
1409                 cpu_relax();
1410                 raw_spin_lock(&qi->q_lock);
1411         }
1412
1413         for (i = 0; i < count; i++)
1414                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1415
1416         reclaim_free_desc(qi);
1417         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1418
1419         if (rc == -EAGAIN)
1420                 goto restart;
1421
1422         if (iotlb_start_ktime)
1423                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1424                                 ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1425
1426         if (devtlb_start_ktime)
1427                 dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1428                                 ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1429
1430         if (iec_start_ktime)
1431                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1432                                 ktime_to_ns(ktime_get()) - iec_start_ktime);
1433
1434         return rc;
1435 }
1436
1437 /*
1438  * Flush the global interrupt entry cache.
1439  */
1440 void qi_global_iec(struct intel_iommu *iommu)
1441 {
1442         struct qi_desc desc;
1443
1444         desc.qw0 = QI_IEC_TYPE;
1445         desc.qw1 = 0;
1446         desc.qw2 = 0;
1447         desc.qw3 = 0;
1448
1449         /* should never fail */
1450         qi_submit_sync(iommu, &desc, 1, 0);
1451 }
1452
1453 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1454                       u64 type)
1455 {
1456         struct qi_desc desc;
1457
1458         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1459                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1460         desc.qw1 = 0;
1461         desc.qw2 = 0;
1462         desc.qw3 = 0;
1463
1464         qi_submit_sync(iommu, &desc, 1, 0);
1465 }
1466
1467 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1468                     unsigned int size_order, u64 type)
1469 {
1470         u8 dw = 0, dr = 0;
1471
1472         struct qi_desc desc;
1473         int ih = 0;
1474
1475         if (cap_write_drain(iommu->cap))
1476                 dw = 1;
1477
1478         if (cap_read_drain(iommu->cap))
1479                 dr = 1;
1480
1481         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1482                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1483         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1484                 | QI_IOTLB_AM(size_order);
1485         desc.qw2 = 0;
1486         desc.qw3 = 0;
1487
1488         qi_submit_sync(iommu, &desc, 1, 0);
1489 }
1490
1491 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1492                         u16 qdep, u64 addr, unsigned mask)
1493 {
1494         struct qi_desc desc;
1495
1496         if (mask) {
1497                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1498                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1499         } else
1500                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1501
1502         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1503                 qdep = 0;
1504
1505         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1506                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1507         desc.qw2 = 0;
1508         desc.qw3 = 0;
1509
1510         qi_submit_sync(iommu, &desc, 1, 0);
1511 }
1512
1513 /* PASID-based IOTLB invalidation */
1514 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1515                      unsigned long npages, bool ih)
1516 {
1517         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1518
1519         /*
1520          * npages == -1 means a PASID-selective invalidation, otherwise,
1521          * a positive value for Page-selective-within-PASID invalidation.
1522          * 0 is not a valid input.
1523          */
1524         if (WARN_ON(!npages)) {
1525                 pr_err("Invalid input npages = %ld\n", npages);
1526                 return;
1527         }
1528
1529         if (npages == -1) {
1530                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1531                                 QI_EIOTLB_DID(did) |
1532                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1533                                 QI_EIOTLB_TYPE;
1534                 desc.qw1 = 0;
1535         } else {
1536                 int mask = ilog2(__roundup_pow_of_two(npages));
1537                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1538
1539                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1540                         addr = ALIGN_DOWN(addr, align);
1541
1542                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1543                                 QI_EIOTLB_DID(did) |
1544                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1545                                 QI_EIOTLB_TYPE;
1546                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1547                                 QI_EIOTLB_IH(ih) |
1548                                 QI_EIOTLB_AM(mask);
1549         }
1550
1551         qi_submit_sync(iommu, &desc, 1, 0);
1552 }
1553
1554 /* PASID-based device IOTLB Invalidate */
1555 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1556                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1557 {
1558         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1559         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1560
1561         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1562                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1563                 QI_DEV_IOTLB_PFSID(pfsid);
1564
1565         /*
1566          * If S bit is 0, we only flush a single page. If S bit is set,
1567          * The least significant zero bit indicates the invalidation address
1568          * range. VT-d spec 6.5.2.6.
1569          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1570          * size order = 0 is PAGE_SIZE 4KB
1571          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1572          * ECAP.
1573          */
1574         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1575                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1576                                     addr, size_order);
1577
1578         /* Take page address */
1579         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1580
1581         if (size_order) {
1582                 /*
1583                  * Existing 0s in address below size_order may be the least
1584                  * significant bit, we must set them to 1s to avoid having
1585                  * smaller size than desired.
1586                  */
1587                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1588                                         VTD_PAGE_SHIFT);
1589                 /* Clear size_order bit to indicate size */
1590                 desc.qw1 &= ~mask;
1591                 /* Set the S bit to indicate flushing more than 1 page */
1592                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1593         }
1594
1595         qi_submit_sync(iommu, &desc, 1, 0);
1596 }
1597
1598 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1599                           u64 granu, u32 pasid)
1600 {
1601         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1602
1603         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1604                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1605         qi_submit_sync(iommu, &desc, 1, 0);
1606 }
1607
1608 /*
1609  * Disable Queued Invalidation interface.
1610  */
1611 void dmar_disable_qi(struct intel_iommu *iommu)
1612 {
1613         unsigned long flags;
1614         u32 sts;
1615         cycles_t start_time = get_cycles();
1616
1617         if (!ecap_qis(iommu->ecap))
1618                 return;
1619
1620         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1621
1622         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1623         if (!(sts & DMA_GSTS_QIES))
1624                 goto end;
1625
1626         /*
1627          * Give a chance to HW to complete the pending invalidation requests.
1628          */
1629         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1630                 readl(iommu->reg + DMAR_IQH_REG)) &&
1631                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1632                 cpu_relax();
1633
1634         iommu->gcmd &= ~DMA_GCMD_QIE;
1635         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1636
1637         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1638                       !(sts & DMA_GSTS_QIES), sts);
1639 end:
1640         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1641 }
1642
1643 /*
1644  * Enable queued invalidation.
1645  */
1646 static void __dmar_enable_qi(struct intel_iommu *iommu)
1647 {
1648         u32 sts;
1649         unsigned long flags;
1650         struct q_inval *qi = iommu->qi;
1651         u64 val = virt_to_phys(qi->desc);
1652
1653         qi->free_head = qi->free_tail = 0;
1654         qi->free_cnt = QI_LENGTH;
1655
1656         /*
1657          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1658          * is present.
1659          */
1660         if (ecap_smts(iommu->ecap))
1661                 val |= (1 << 11) | 1;
1662
1663         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1664
1665         /* write zero to the tail reg */
1666         writel(0, iommu->reg + DMAR_IQT_REG);
1667
1668         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1669
1670         iommu->gcmd |= DMA_GCMD_QIE;
1671         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1672
1673         /* Make sure hardware complete it */
1674         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1675
1676         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1677 }
1678
1679 /*
1680  * Enable Queued Invalidation interface. This is a must to support
1681  * interrupt-remapping. Also used by DMA-remapping, which replaces
1682  * register based IOTLB invalidation.
1683  */
1684 int dmar_enable_qi(struct intel_iommu *iommu)
1685 {
1686         struct q_inval *qi;
1687         struct page *desc_page;
1688
1689         if (!ecap_qis(iommu->ecap))
1690                 return -ENOENT;
1691
1692         /*
1693          * queued invalidation is already setup and enabled.
1694          */
1695         if (iommu->qi)
1696                 return 0;
1697
1698         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1699         if (!iommu->qi)
1700                 return -ENOMEM;
1701
1702         qi = iommu->qi;
1703
1704         /*
1705          * Need two pages to accommodate 256 descriptors of 256 bits each
1706          * if the remapping hardware supports scalable mode translation.
1707          */
1708         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1709                                      !!ecap_smts(iommu->ecap));
1710         if (!desc_page) {
1711                 kfree(qi);
1712                 iommu->qi = NULL;
1713                 return -ENOMEM;
1714         }
1715
1716         qi->desc = page_address(desc_page);
1717
1718         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1719         if (!qi->desc_status) {
1720                 free_page((unsigned long) qi->desc);
1721                 kfree(qi);
1722                 iommu->qi = NULL;
1723                 return -ENOMEM;
1724         }
1725
1726         raw_spin_lock_init(&qi->q_lock);
1727
1728         __dmar_enable_qi(iommu);
1729
1730         return 0;
1731 }
1732
1733 /* iommu interrupt handling. Most stuff are MSI-like. */
1734
1735 enum faulttype {
1736         DMA_REMAP,
1737         INTR_REMAP,
1738         UNKNOWN,
1739 };
1740
1741 static const char *dma_remap_fault_reasons[] =
1742 {
1743         "Software",
1744         "Present bit in root entry is clear",
1745         "Present bit in context entry is clear",
1746         "Invalid context entry",
1747         "Access beyond MGAW",
1748         "PTE Write access is not set",
1749         "PTE Read access is not set",
1750         "Next page table ptr is invalid",
1751         "Root table address invalid",
1752         "Context table ptr is invalid",
1753         "non-zero reserved fields in RTP",
1754         "non-zero reserved fields in CTP",
1755         "non-zero reserved fields in PTE",
1756         "PCE for translation request specifies blocking",
1757 };
1758
1759 static const char * const dma_remap_sm_fault_reasons[] = {
1760         "SM: Invalid Root Table Address",
1761         "SM: TTM 0 for request with PASID",
1762         "SM: TTM 0 for page group request",
1763         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1764         "SM: Error attempting to access Root Entry",
1765         "SM: Present bit in Root Entry is clear",
1766         "SM: Non-zero reserved field set in Root Entry",
1767         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1768         "SM: Error attempting to access Context Entry",
1769         "SM: Present bit in Context Entry is clear",
1770         "SM: Non-zero reserved field set in the Context Entry",
1771         "SM: Invalid Context Entry",
1772         "SM: DTE field in Context Entry is clear",
1773         "SM: PASID Enable field in Context Entry is clear",
1774         "SM: PASID is larger than the max in Context Entry",
1775         "SM: PRE field in Context-Entry is clear",
1776         "SM: RID_PASID field error in Context-Entry",
1777         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1778         "SM: Error attempting to access the PASID Directory Entry",
1779         "SM: Present bit in Directory Entry is clear",
1780         "SM: Non-zero reserved field set in PASID Directory Entry",
1781         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1782         "SM: Error attempting to access PASID Table Entry",
1783         "SM: Present bit in PASID Table Entry is clear",
1784         "SM: Non-zero reserved field set in PASID Table Entry",
1785         "SM: Invalid Scalable-Mode PASID Table Entry",
1786         "SM: ERE field is clear in PASID Table Entry",
1787         "SM: SRE field is clear in PASID Table Entry",
1788         "Unknown", "Unknown",/* 0x5E-0x5F */
1789         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1790         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1791         "SM: Error attempting to access first-level paging entry",
1792         "SM: Present bit in first-level paging entry is clear",
1793         "SM: Non-zero reserved field set in first-level paging entry",
1794         "SM: Error attempting to access FL-PML4 entry",
1795         "SM: First-level entry address beyond MGAW in Nested translation",
1796         "SM: Read permission error in FL-PML4 entry in Nested translation",
1797         "SM: Read permission error in first-level paging entry in Nested translation",
1798         "SM: Write permission error in first-level paging entry in Nested translation",
1799         "SM: Error attempting to access second-level paging entry",
1800         "SM: Read/Write permission error in second-level paging entry",
1801         "SM: Non-zero reserved field set in second-level paging entry",
1802         "SM: Invalid second-level page table pointer",
1803         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1804         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1805         "SM: Address in first-level translation is not canonical",
1806         "SM: U/S set 0 for first-level translation with user privilege",
1807         "SM: No execute permission for request with PASID and ER=1",
1808         "SM: Address beyond the DMA hardware max",
1809         "SM: Second-level entry address beyond the max",
1810         "SM: No write permission for Write/AtomicOp request",
1811         "SM: No read permission for Read/AtomicOp request",
1812         "SM: Invalid address-interrupt address",
1813         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1814         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1815 };
1816
1817 static const char *irq_remap_fault_reasons[] =
1818 {
1819         "Detected reserved fields in the decoded interrupt-remapped request",
1820         "Interrupt index exceeded the interrupt-remapping table size",
1821         "Present field in the IRTE entry is clear",
1822         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1823         "Detected reserved fields in the IRTE entry",
1824         "Blocked a compatibility format interrupt request",
1825         "Blocked an interrupt request due to source-id verification failure",
1826 };
1827
1828 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1829 {
1830         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1831                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1832                 *fault_type = INTR_REMAP;
1833                 return irq_remap_fault_reasons[fault_reason - 0x20];
1834         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1835                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1836                 *fault_type = DMA_REMAP;
1837                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1838         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1839                 *fault_type = DMA_REMAP;
1840                 return dma_remap_fault_reasons[fault_reason];
1841         } else {
1842                 *fault_type = UNKNOWN;
1843                 return "Unknown";
1844         }
1845 }
1846
1847
1848 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1849 {
1850         if (iommu->irq == irq)
1851                 return DMAR_FECTL_REG;
1852         else if (iommu->pr_irq == irq)
1853                 return DMAR_PECTL_REG;
1854         else
1855                 BUG();
1856 }
1857
1858 void dmar_msi_unmask(struct irq_data *data)
1859 {
1860         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1861         int reg = dmar_msi_reg(iommu, data->irq);
1862         unsigned long flag;
1863
1864         /* unmask it */
1865         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1866         writel(0, iommu->reg + reg);
1867         /* Read a reg to force flush the post write */
1868         readl(iommu->reg + reg);
1869         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1870 }
1871
1872 void dmar_msi_mask(struct irq_data *data)
1873 {
1874         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1875         int reg = dmar_msi_reg(iommu, data->irq);
1876         unsigned long flag;
1877
1878         /* mask it */
1879         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1880         writel(DMA_FECTL_IM, iommu->reg + reg);
1881         /* Read a reg to force flush the post write */
1882         readl(iommu->reg + reg);
1883         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1884 }
1885
1886 void dmar_msi_write(int irq, struct msi_msg *msg)
1887 {
1888         struct intel_iommu *iommu = irq_get_handler_data(irq);
1889         int reg = dmar_msi_reg(iommu, irq);
1890         unsigned long flag;
1891
1892         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1893         writel(msg->data, iommu->reg + reg + 4);
1894         writel(msg->address_lo, iommu->reg + reg + 8);
1895         writel(msg->address_hi, iommu->reg + reg + 12);
1896         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1897 }
1898
1899 void dmar_msi_read(int irq, struct msi_msg *msg)
1900 {
1901         struct intel_iommu *iommu = irq_get_handler_data(irq);
1902         int reg = dmar_msi_reg(iommu, irq);
1903         unsigned long flag;
1904
1905         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1906         msg->data = readl(iommu->reg + reg + 4);
1907         msg->address_lo = readl(iommu->reg + reg + 8);
1908         msg->address_hi = readl(iommu->reg + reg + 12);
1909         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1910 }
1911
1912 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1913                 u8 fault_reason, u32 pasid, u16 source_id,
1914                 unsigned long long addr)
1915 {
1916         const char *reason;
1917         int fault_type;
1918
1919         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1920
1921         if (fault_type == INTR_REMAP) {
1922                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1923                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1924                        PCI_FUNC(source_id & 0xFF), addr >> 48,
1925                        fault_reason, reason);
1926
1927                 return 0;
1928         }
1929
1930         if (pasid == INVALID_IOASID)
1931                 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1932                        type ? "DMA Read" : "DMA Write",
1933                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1934                        PCI_FUNC(source_id & 0xFF), addr,
1935                        fault_reason, reason);
1936         else
1937                 pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1938                        type ? "DMA Read" : "DMA Write", pasid,
1939                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1940                        PCI_FUNC(source_id & 0xFF), addr,
1941                        fault_reason, reason);
1942
1943         dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
1944
1945         return 0;
1946 }
1947
1948 #define PRIMARY_FAULT_REG_LEN (16)
1949 irqreturn_t dmar_fault(int irq, void *dev_id)
1950 {
1951         struct intel_iommu *iommu = dev_id;
1952         int reg, fault_index;
1953         u32 fault_status;
1954         unsigned long flag;
1955         static DEFINE_RATELIMIT_STATE(rs,
1956                                       DEFAULT_RATELIMIT_INTERVAL,
1957                                       DEFAULT_RATELIMIT_BURST);
1958
1959         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1960         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1961         if (fault_status && __ratelimit(&rs))
1962                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1963
1964         /* TBD: ignore advanced fault log currently */
1965         if (!(fault_status & DMA_FSTS_PPF))
1966                 goto unlock_exit;
1967
1968         fault_index = dma_fsts_fault_record_index(fault_status);
1969         reg = cap_fault_reg_offset(iommu->cap);
1970         while (1) {
1971                 /* Disable printing, simply clear the fault when ratelimited */
1972                 bool ratelimited = !__ratelimit(&rs);
1973                 u8 fault_reason;
1974                 u16 source_id;
1975                 u64 guest_addr;
1976                 u32 pasid;
1977                 int type;
1978                 u32 data;
1979                 bool pasid_present;
1980
1981                 /* highest 32 bits */
1982                 data = readl(iommu->reg + reg +
1983                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1984                 if (!(data & DMA_FRCD_F))
1985                         break;
1986
1987                 if (!ratelimited) {
1988                         fault_reason = dma_frcd_fault_reason(data);
1989                         type = dma_frcd_type(data);
1990
1991                         pasid = dma_frcd_pasid_value(data);
1992                         data = readl(iommu->reg + reg +
1993                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
1994                         source_id = dma_frcd_source_id(data);
1995
1996                         pasid_present = dma_frcd_pasid_present(data);
1997                         guest_addr = dmar_readq(iommu->reg + reg +
1998                                         fault_index * PRIMARY_FAULT_REG_LEN);
1999                         guest_addr = dma_frcd_page_addr(guest_addr);
2000                 }
2001
2002                 /* clear the fault */
2003                 writel(DMA_FRCD_F, iommu->reg + reg +
2004                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
2005
2006                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2007
2008                 if (!ratelimited)
2009                         /* Using pasid -1 if pasid is not present */
2010                         dmar_fault_do_one(iommu, type, fault_reason,
2011                                           pasid_present ? pasid : INVALID_IOASID,
2012                                           source_id, guest_addr);
2013
2014                 fault_index++;
2015                 if (fault_index >= cap_num_fault_regs(iommu->cap))
2016                         fault_index = 0;
2017                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2018         }
2019
2020         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2021                iommu->reg + DMAR_FSTS_REG);
2022
2023 unlock_exit:
2024         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2025         return IRQ_HANDLED;
2026 }
2027
2028 int dmar_set_interrupt(struct intel_iommu *iommu)
2029 {
2030         int irq, ret;
2031
2032         /*
2033          * Check if the fault interrupt is already initialized.
2034          */
2035         if (iommu->irq)
2036                 return 0;
2037
2038         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2039         if (irq > 0) {
2040                 iommu->irq = irq;
2041         } else {
2042                 pr_err("No free IRQ vectors\n");
2043                 return -EINVAL;
2044         }
2045
2046         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2047         if (ret)
2048                 pr_err("Can't request irq\n");
2049         return ret;
2050 }
2051
2052 int __init enable_drhd_fault_handling(void)
2053 {
2054         struct dmar_drhd_unit *drhd;
2055         struct intel_iommu *iommu;
2056
2057         /*
2058          * Enable fault control interrupt.
2059          */
2060         for_each_iommu(iommu, drhd) {
2061                 u32 fault_status;
2062                 int ret = dmar_set_interrupt(iommu);
2063
2064                 if (ret) {
2065                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2066                                (unsigned long long)drhd->reg_base_addr, ret);
2067                         return -1;
2068                 }
2069
2070                 /*
2071                  * Clear any previous faults.
2072                  */
2073                 dmar_fault(iommu->irq, iommu);
2074                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2075                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2076         }
2077
2078         return 0;
2079 }
2080
2081 /*
2082  * Re-enable Queued Invalidation interface.
2083  */
2084 int dmar_reenable_qi(struct intel_iommu *iommu)
2085 {
2086         if (!ecap_qis(iommu->ecap))
2087                 return -ENOENT;
2088
2089         if (!iommu->qi)
2090                 return -ENOENT;
2091
2092         /*
2093          * First disable queued invalidation.
2094          */
2095         dmar_disable_qi(iommu);
2096         /*
2097          * Then enable queued invalidation again. Since there is no pending
2098          * invalidation requests now, it's safe to re-enable queued
2099          * invalidation.
2100          */
2101         __dmar_enable_qi(iommu);
2102
2103         return 0;
2104 }
2105
2106 /*
2107  * Check interrupt remapping support in DMAR table description.
2108  */
2109 int __init dmar_ir_support(void)
2110 {
2111         struct acpi_table_dmar *dmar;
2112         dmar = (struct acpi_table_dmar *)dmar_tbl;
2113         if (!dmar)
2114                 return 0;
2115         return dmar->flags & 0x1;
2116 }
2117
2118 /* Check whether DMAR units are in use */
2119 static inline bool dmar_in_use(void)
2120 {
2121         return irq_remapping_enabled || intel_iommu_enabled;
2122 }
2123
2124 static int __init dmar_free_unused_resources(void)
2125 {
2126         struct dmar_drhd_unit *dmaru, *dmaru_n;
2127
2128         if (dmar_in_use())
2129                 return 0;
2130
2131         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2132                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2133
2134         down_write(&dmar_global_lock);
2135         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2136                 list_del(&dmaru->list);
2137                 dmar_free_drhd(dmaru);
2138         }
2139         up_write(&dmar_global_lock);
2140
2141         return 0;
2142 }
2143
2144 late_initcall(dmar_free_unused_resources);
2145
2146 /*
2147  * DMAR Hotplug Support
2148  * For more details, please refer to Intel(R) Virtualization Technology
2149  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2150  * "Remapping Hardware Unit Hot Plug".
2151  */
2152 static guid_t dmar_hp_guid =
2153         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2154                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2155
2156 /*
2157  * Currently there's only one revision and BIOS will not check the revision id,
2158  * so use 0 for safety.
2159  */
2160 #define DMAR_DSM_REV_ID                 0
2161 #define DMAR_DSM_FUNC_DRHD              1
2162 #define DMAR_DSM_FUNC_ATSR              2
2163 #define DMAR_DSM_FUNC_RHSA              3
2164 #define DMAR_DSM_FUNC_SATC              4
2165
2166 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2167 {
2168         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2169 }
2170
2171 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2172                                   dmar_res_handler_t handler, void *arg)
2173 {
2174         int ret = -ENODEV;
2175         union acpi_object *obj;
2176         struct acpi_dmar_header *start;
2177         struct dmar_res_callback callback;
2178         static int res_type[] = {
2179                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2180                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2181                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2182                 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2183         };
2184
2185         if (!dmar_detect_dsm(handle, func))
2186                 return 0;
2187
2188         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2189                                       func, NULL, ACPI_TYPE_BUFFER);
2190         if (!obj)
2191                 return -ENODEV;
2192
2193         memset(&callback, 0, sizeof(callback));
2194         callback.cb[res_type[func]] = handler;
2195         callback.arg[res_type[func]] = arg;
2196         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2197         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2198
2199         ACPI_FREE(obj);
2200
2201         return ret;
2202 }
2203
2204 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2205 {
2206         int ret;
2207         struct dmar_drhd_unit *dmaru;
2208
2209         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2210         if (!dmaru)
2211                 return -ENODEV;
2212
2213         ret = dmar_ir_hotplug(dmaru, true);
2214         if (ret == 0)
2215                 ret = dmar_iommu_hotplug(dmaru, true);
2216
2217         return ret;
2218 }
2219
2220 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2221 {
2222         int i, ret;
2223         struct device *dev;
2224         struct dmar_drhd_unit *dmaru;
2225
2226         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2227         if (!dmaru)
2228                 return 0;
2229
2230         /*
2231          * All PCI devices managed by this unit should have been destroyed.
2232          */
2233         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2234                 for_each_active_dev_scope(dmaru->devices,
2235                                           dmaru->devices_cnt, i, dev)
2236                         return -EBUSY;
2237         }
2238
2239         ret = dmar_ir_hotplug(dmaru, false);
2240         if (ret == 0)
2241                 ret = dmar_iommu_hotplug(dmaru, false);
2242
2243         return ret;
2244 }
2245
2246 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2247 {
2248         struct dmar_drhd_unit *dmaru;
2249
2250         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2251         if (dmaru) {
2252                 list_del_rcu(&dmaru->list);
2253                 synchronize_rcu();
2254                 dmar_free_drhd(dmaru);
2255         }
2256
2257         return 0;
2258 }
2259
2260 static int dmar_hotplug_insert(acpi_handle handle)
2261 {
2262         int ret;
2263         int drhd_count = 0;
2264
2265         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2266                                      &dmar_validate_one_drhd, (void *)1);
2267         if (ret)
2268                 goto out;
2269
2270         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2271                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2272         if (ret == 0 && drhd_count == 0) {
2273                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2274                 goto out;
2275         } else if (ret) {
2276                 goto release_drhd;
2277         }
2278
2279         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2280                                      &dmar_parse_one_rhsa, NULL);
2281         if (ret)
2282                 goto release_drhd;
2283
2284         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2285                                      &dmar_parse_one_atsr, NULL);
2286         if (ret)
2287                 goto release_atsr;
2288
2289         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2290                                      &dmar_hp_add_drhd, NULL);
2291         if (!ret)
2292                 return 0;
2293
2294         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2295                                &dmar_hp_remove_drhd, NULL);
2296 release_atsr:
2297         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2298                                &dmar_release_one_atsr, NULL);
2299 release_drhd:
2300         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2301                                &dmar_hp_release_drhd, NULL);
2302 out:
2303         return ret;
2304 }
2305
2306 static int dmar_hotplug_remove(acpi_handle handle)
2307 {
2308         int ret;
2309
2310         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2311                                      &dmar_check_one_atsr, NULL);
2312         if (ret)
2313                 return ret;
2314
2315         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2316                                      &dmar_hp_remove_drhd, NULL);
2317         if (ret == 0) {
2318                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2319                                                &dmar_release_one_atsr, NULL));
2320                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2321                                                &dmar_hp_release_drhd, NULL));
2322         } else {
2323                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2324                                        &dmar_hp_add_drhd, NULL);
2325         }
2326
2327         return ret;
2328 }
2329
2330 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2331                                        void *context, void **retval)
2332 {
2333         acpi_handle *phdl = retval;
2334
2335         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2336                 *phdl = handle;
2337                 return AE_CTRL_TERMINATE;
2338         }
2339
2340         return AE_OK;
2341 }
2342
2343 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2344 {
2345         int ret;
2346         acpi_handle tmp = NULL;
2347         acpi_status status;
2348
2349         if (!dmar_in_use())
2350                 return 0;
2351
2352         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2353                 tmp = handle;
2354         } else {
2355                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2356                                              ACPI_UINT32_MAX,
2357                                              dmar_get_dsm_handle,
2358                                              NULL, NULL, &tmp);
2359                 if (ACPI_FAILURE(status)) {
2360                         pr_warn("Failed to locate _DSM method.\n");
2361                         return -ENXIO;
2362                 }
2363         }
2364         if (tmp == NULL)
2365                 return 0;
2366
2367         down_write(&dmar_global_lock);
2368         if (insert)
2369                 ret = dmar_hotplug_insert(tmp);
2370         else
2371                 ret = dmar_hotplug_remove(tmp);
2372         up_write(&dmar_global_lock);
2373
2374         return ret;
2375 }
2376
2377 int dmar_device_add(acpi_handle handle)
2378 {
2379         return dmar_device_hotplug(handle, true);
2380 }
2381
2382 int dmar_device_remove(acpi_handle handle)
2383 {
2384         return dmar_device_hotplug(handle, false);
2385 }
2386
2387 /*
2388  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2389  *
2390  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2391  * the ACPI DMAR table. This means that the platform boot firmware has made
2392  * sure no device can issue DMA outside of RMRR regions.
2393  */
2394 bool dmar_platform_optin(void)
2395 {
2396         struct acpi_table_dmar *dmar;
2397         acpi_status status;
2398         bool ret;
2399
2400         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2401                                 (struct acpi_table_header **)&dmar);
2402         if (ACPI_FAILURE(status))
2403                 return false;
2404
2405         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2406         acpi_put_table((struct acpi_table_header *)dmar);
2407
2408         return ret;
2409 }
2410 EXPORT_SYMBOL_GPL(dmar_platform_optin);