Merge tag 'ceph-for-5.16-rc1' of git://github.com/ceph/ceph-client
[linux-2.6-microblaze.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 #include <trace/events/intel_iommu.h>
35
36 #include "../irq_remapping.h"
37 #include "perf.h"
38
39 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
40 struct dmar_res_callback {
41         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
42         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
43         bool                    ignore_unhandled;
44         bool                    print_entry;
45 };
46
47 /*
48  * Assumptions:
49  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
50  *    before IO devices managed by that unit.
51  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
52  *    after IO devices managed by that unit.
53  * 3) Hotplug events are rare.
54  *
55  * Locking rules for DMA and interrupt remapping related global data structures:
56  * 1) Use dmar_global_lock in process context
57  * 2) Use RCU in interrupt context
58  */
59 DECLARE_RWSEM(dmar_global_lock);
60 LIST_HEAD(dmar_drhd_units);
61
62 struct acpi_table_header * __initdata dmar_tbl;
63 static int dmar_dev_scope_status = 1;
64 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
65
66 static int alloc_iommu(struct dmar_drhd_unit *drhd);
67 static void free_iommu(struct intel_iommu *iommu);
68
69 extern const struct iommu_ops intel_iommu_ops;
70
71 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
72 {
73         /*
74          * add INCLUDE_ALL at the tail, so scan the list will find it at
75          * the very end.
76          */
77         if (drhd->include_all)
78                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
79         else
80                 list_add_rcu(&drhd->list, &dmar_drhd_units);
81 }
82
83 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
84 {
85         struct acpi_dmar_device_scope *scope;
86
87         *cnt = 0;
88         while (start < end) {
89                 scope = start;
90                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
91                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
92                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
93                         (*cnt)++;
94                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
95                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
96                         pr_warn("Unsupported device scope\n");
97                 }
98                 start += scope->length;
99         }
100         if (*cnt == 0)
101                 return NULL;
102
103         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
104 }
105
106 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
107 {
108         int i;
109         struct device *tmp_dev;
110
111         if (*devices && *cnt) {
112                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
113                         put_device(tmp_dev);
114                 kfree(*devices);
115         }
116
117         *devices = NULL;
118         *cnt = 0;
119 }
120
121 /* Optimize out kzalloc()/kfree() for normal cases */
122 static char dmar_pci_notify_info_buf[64];
123
124 static struct dmar_pci_notify_info *
125 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
126 {
127         int level = 0;
128         size_t size;
129         struct pci_dev *tmp;
130         struct dmar_pci_notify_info *info;
131
132         BUG_ON(dev->is_virtfn);
133
134         /*
135          * Ignore devices that have a domain number higher than what can
136          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
137          */
138         if (pci_domain_nr(dev->bus) > U16_MAX)
139                 return NULL;
140
141         /* Only generate path[] for device addition event */
142         if (event == BUS_NOTIFY_ADD_DEVICE)
143                 for (tmp = dev; tmp; tmp = tmp->bus->self)
144                         level++;
145
146         size = struct_size(info, path, level);
147         if (size <= sizeof(dmar_pci_notify_info_buf)) {
148                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
149         } else {
150                 info = kzalloc(size, GFP_KERNEL);
151                 if (!info) {
152                         if (dmar_dev_scope_status == 0)
153                                 dmar_dev_scope_status = -ENOMEM;
154                         return NULL;
155                 }
156         }
157
158         info->event = event;
159         info->dev = dev;
160         info->seg = pci_domain_nr(dev->bus);
161         info->level = level;
162         if (event == BUS_NOTIFY_ADD_DEVICE) {
163                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
164                         level--;
165                         info->path[level].bus = tmp->bus->number;
166                         info->path[level].device = PCI_SLOT(tmp->devfn);
167                         info->path[level].function = PCI_FUNC(tmp->devfn);
168                         if (pci_is_root_bus(tmp->bus))
169                                 info->bus = tmp->bus->number;
170                 }
171         }
172
173         return info;
174 }
175
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178         if ((void *)info != dmar_pci_notify_info_buf)
179                 kfree(info);
180 }
181
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183                                 struct acpi_dmar_pci_path *path, int count)
184 {
185         int i;
186
187         if (info->bus != bus)
188                 goto fallback;
189         if (info->level != count)
190                 goto fallback;
191
192         for (i = 0; i < count; i++) {
193                 if (path[i].device != info->path[i].device ||
194                     path[i].function != info->path[i].function)
195                         goto fallback;
196         }
197
198         return true;
199
200 fallback:
201
202         if (count != 1)
203                 return false;
204
205         i = info->level - 1;
206         if (bus              == info->path[i].bus &&
207             path[0].device   == info->path[i].device &&
208             path[0].function == info->path[i].function) {
209                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210                         bus, path[0].device, path[0].function);
211                 return true;
212         }
213
214         return false;
215 }
216
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219                           void *start, void*end, u16 segment,
220                           struct dmar_dev_scope *devices,
221                           int devices_cnt)
222 {
223         int i, level;
224         struct device *tmp, *dev = &info->dev->dev;
225         struct acpi_dmar_device_scope *scope;
226         struct acpi_dmar_pci_path *path;
227
228         if (segment != info->seg)
229                 return 0;
230
231         for (; start < end; start += scope->length) {
232                 scope = start;
233                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235                         continue;
236
237                 path = (struct acpi_dmar_pci_path *)(scope + 1);
238                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
239                 if (!dmar_match_pci_path(info, scope->bus, path, level))
240                         continue;
241
242                 /*
243                  * We expect devices with endpoint scope to have normal PCI
244                  * headers, and devices with bridge scope to have bridge PCI
245                  * headers.  However PCI NTB devices may be listed in the
246                  * DMAR table with bridge scope, even though they have a
247                  * normal PCI header.  NTB devices are identified by class
248                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249                  * for this special case.
250                  */
251                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256                         pr_warn("Device scope type does not match for %s\n",
257                                 pci_name(info->dev));
258                         return -EINVAL;
259                 }
260
261                 for_each_dev_scope(devices, devices_cnt, i, tmp)
262                         if (tmp == NULL) {
263                                 devices[i].bus = info->dev->bus->number;
264                                 devices[i].devfn = info->dev->devfn;
265                                 rcu_assign_pointer(devices[i].dev,
266                                                    get_device(dev));
267                                 return 1;
268                         }
269                 BUG_ON(i >= devices_cnt);
270         }
271
272         return 0;
273 }
274
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276                           struct dmar_dev_scope *devices, int count)
277 {
278         int index;
279         struct device *tmp;
280
281         if (info->seg != segment)
282                 return 0;
283
284         for_each_active_dev_scope(devices, count, index, tmp)
285                 if (tmp == &info->dev->dev) {
286                         RCU_INIT_POINTER(devices[index].dev, NULL);
287                         synchronize_rcu();
288                         put_device(tmp);
289                         return 1;
290                 }
291
292         return 0;
293 }
294
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297         int ret = 0;
298         struct dmar_drhd_unit *dmaru;
299         struct acpi_dmar_hardware_unit *drhd;
300
301         for_each_drhd_unit(dmaru) {
302                 if (dmaru->include_all)
303                         continue;
304
305                 drhd = container_of(dmaru->hdr,
306                                     struct acpi_dmar_hardware_unit, header);
307                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308                                 ((void *)drhd) + drhd->header.length,
309                                 dmaru->segment,
310                                 dmaru->devices, dmaru->devices_cnt);
311                 if (ret)
312                         break;
313         }
314         if (ret >= 0)
315                 ret = dmar_iommu_notify_scope_dev(info);
316         if (ret < 0 && dmar_dev_scope_status == 0)
317                 dmar_dev_scope_status = ret;
318
319         if (ret >= 0)
320                 intel_irq_remap_add_device(info);
321
322         return ret;
323 }
324
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327         struct dmar_drhd_unit *dmaru;
328
329         for_each_drhd_unit(dmaru)
330                 if (dmar_remove_dev_scope(info, dmaru->segment,
331                         dmaru->devices, dmaru->devices_cnt))
332                         break;
333         dmar_iommu_notify_scope_dev(info);
334 }
335
336 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
337 {
338         struct pci_dev *physfn = pci_physfn(pdev);
339
340         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
341 }
342
343 static int dmar_pci_bus_notifier(struct notifier_block *nb,
344                                  unsigned long action, void *data)
345 {
346         struct pci_dev *pdev = to_pci_dev(data);
347         struct dmar_pci_notify_info *info;
348
349         /* Only care about add/remove events for physical functions.
350          * For VFs we actually do the lookup based on the corresponding
351          * PF in device_to_iommu() anyway. */
352         if (pdev->is_virtfn) {
353                 /*
354                  * Ensure that the VF device inherits the irq domain of the
355                  * PF device. Ideally the device would inherit the domain
356                  * from the bus, but DMAR can have multiple units per bus
357                  * which makes this impossible. The VF 'bus' could inherit
358                  * from the PF device, but that's yet another x86'sism to
359                  * inflict on everybody else.
360                  */
361                 if (action == BUS_NOTIFY_ADD_DEVICE)
362                         vf_inherit_msi_domain(pdev);
363                 return NOTIFY_DONE;
364         }
365
366         if (action != BUS_NOTIFY_ADD_DEVICE &&
367             action != BUS_NOTIFY_REMOVED_DEVICE)
368                 return NOTIFY_DONE;
369
370         info = dmar_alloc_pci_notify_info(pdev, action);
371         if (!info)
372                 return NOTIFY_DONE;
373
374         down_write(&dmar_global_lock);
375         if (action == BUS_NOTIFY_ADD_DEVICE)
376                 dmar_pci_bus_add_dev(info);
377         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
378                 dmar_pci_bus_del_dev(info);
379         up_write(&dmar_global_lock);
380
381         dmar_free_pci_notify_info(info);
382
383         return NOTIFY_OK;
384 }
385
386 static struct notifier_block dmar_pci_bus_nb = {
387         .notifier_call = dmar_pci_bus_notifier,
388         .priority = INT_MIN,
389 };
390
391 static struct dmar_drhd_unit *
392 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
393 {
394         struct dmar_drhd_unit *dmaru;
395
396         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
397                                 dmar_rcu_check())
398                 if (dmaru->segment == drhd->segment &&
399                     dmaru->reg_base_addr == drhd->address)
400                         return dmaru;
401
402         return NULL;
403 }
404
405 /*
406  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
407  * structure which uniquely represent one DMA remapping hardware unit
408  * present in the platform
409  */
410 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
411 {
412         struct acpi_dmar_hardware_unit *drhd;
413         struct dmar_drhd_unit *dmaru;
414         int ret;
415
416         drhd = (struct acpi_dmar_hardware_unit *)header;
417         dmaru = dmar_find_dmaru(drhd);
418         if (dmaru)
419                 goto out;
420
421         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
422         if (!dmaru)
423                 return -ENOMEM;
424
425         /*
426          * If header is allocated from slab by ACPI _DSM method, we need to
427          * copy the content because the memory buffer will be freed on return.
428          */
429         dmaru->hdr = (void *)(dmaru + 1);
430         memcpy(dmaru->hdr, header, header->length);
431         dmaru->reg_base_addr = drhd->address;
432         dmaru->segment = drhd->segment;
433         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
434         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
435                                               ((void *)drhd) + drhd->header.length,
436                                               &dmaru->devices_cnt);
437         if (dmaru->devices_cnt && dmaru->devices == NULL) {
438                 kfree(dmaru);
439                 return -ENOMEM;
440         }
441
442         ret = alloc_iommu(dmaru);
443         if (ret) {
444                 dmar_free_dev_scope(&dmaru->devices,
445                                     &dmaru->devices_cnt);
446                 kfree(dmaru);
447                 return ret;
448         }
449         dmar_register_drhd_unit(dmaru);
450
451 out:
452         if (arg)
453                 (*(int *)arg)++;
454
455         return 0;
456 }
457
458 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
459 {
460         if (dmaru->devices && dmaru->devices_cnt)
461                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
462         if (dmaru->iommu)
463                 free_iommu(dmaru->iommu);
464         kfree(dmaru);
465 }
466
467 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
468                                       void *arg)
469 {
470         struct acpi_dmar_andd *andd = (void *)header;
471
472         /* Check for NUL termination within the designated length */
473         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
474                 pr_warn(FW_BUG
475                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
476                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
477                            dmi_get_system_info(DMI_BIOS_VENDOR),
478                            dmi_get_system_info(DMI_BIOS_VERSION),
479                            dmi_get_system_info(DMI_PRODUCT_VERSION));
480                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
481                 return -EINVAL;
482         }
483         pr_info("ANDD device: %x name: %s\n", andd->device_number,
484                 andd->device_name);
485
486         return 0;
487 }
488
489 #ifdef CONFIG_ACPI_NUMA
490 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
491 {
492         struct acpi_dmar_rhsa *rhsa;
493         struct dmar_drhd_unit *drhd;
494
495         rhsa = (struct acpi_dmar_rhsa *)header;
496         for_each_drhd_unit(drhd) {
497                 if (drhd->reg_base_addr == rhsa->base_address) {
498                         int node = pxm_to_node(rhsa->proximity_domain);
499
500                         if (!node_online(node))
501                                 node = NUMA_NO_NODE;
502                         drhd->iommu->node = node;
503                         return 0;
504                 }
505         }
506         pr_warn(FW_BUG
507                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
508                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
509                 rhsa->base_address,
510                 dmi_get_system_info(DMI_BIOS_VENDOR),
511                 dmi_get_system_info(DMI_BIOS_VERSION),
512                 dmi_get_system_info(DMI_PRODUCT_VERSION));
513         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
514
515         return 0;
516 }
517 #else
518 #define dmar_parse_one_rhsa             dmar_res_noop
519 #endif
520
521 static void
522 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
523 {
524         struct acpi_dmar_hardware_unit *drhd;
525         struct acpi_dmar_reserved_memory *rmrr;
526         struct acpi_dmar_atsr *atsr;
527         struct acpi_dmar_rhsa *rhsa;
528         struct acpi_dmar_satc *satc;
529
530         switch (header->type) {
531         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
532                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
533                                     header);
534                 pr_info("DRHD base: %#016Lx flags: %#x\n",
535                         (unsigned long long)drhd->address, drhd->flags);
536                 break;
537         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
538                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
539                                     header);
540                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
541                         (unsigned long long)rmrr->base_address,
542                         (unsigned long long)rmrr->end_address);
543                 break;
544         case ACPI_DMAR_TYPE_ROOT_ATS:
545                 atsr = container_of(header, struct acpi_dmar_atsr, header);
546                 pr_info("ATSR flags: %#x\n", atsr->flags);
547                 break;
548         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
549                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
550                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
551                        (unsigned long long)rhsa->base_address,
552                        rhsa->proximity_domain);
553                 break;
554         case ACPI_DMAR_TYPE_NAMESPACE:
555                 /* We don't print this here because we need to sanity-check
556                    it first. So print it in dmar_parse_one_andd() instead. */
557                 break;
558         case ACPI_DMAR_TYPE_SATC:
559                 satc = container_of(header, struct acpi_dmar_satc, header);
560                 pr_info("SATC flags: 0x%x\n", satc->flags);
561                 break;
562         }
563 }
564
565 /**
566  * dmar_table_detect - checks to see if the platform supports DMAR devices
567  */
568 static int __init dmar_table_detect(void)
569 {
570         acpi_status status = AE_OK;
571
572         /* if we could find DMAR table, then there are DMAR devices */
573         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
574
575         if (ACPI_SUCCESS(status) && !dmar_tbl) {
576                 pr_warn("Unable to map DMAR\n");
577                 status = AE_NOT_FOUND;
578         }
579
580         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
581 }
582
583 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
584                                        size_t len, struct dmar_res_callback *cb)
585 {
586         struct acpi_dmar_header *iter, *next;
587         struct acpi_dmar_header *end = ((void *)start) + len;
588
589         for (iter = start; iter < end; iter = next) {
590                 next = (void *)iter + iter->length;
591                 if (iter->length == 0) {
592                         /* Avoid looping forever on bad ACPI tables */
593                         pr_debug(FW_BUG "Invalid 0-length structure\n");
594                         break;
595                 } else if (next > end) {
596                         /* Avoid passing table end */
597                         pr_warn(FW_BUG "Record passes table end\n");
598                         return -EINVAL;
599                 }
600
601                 if (cb->print_entry)
602                         dmar_table_print_dmar_entry(iter);
603
604                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
605                         /* continue for forward compatibility */
606                         pr_debug("Unknown DMAR structure type %d\n",
607                                  iter->type);
608                 } else if (cb->cb[iter->type]) {
609                         int ret;
610
611                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
612                         if (ret)
613                                 return ret;
614                 } else if (!cb->ignore_unhandled) {
615                         pr_warn("No handler for DMAR structure type %d\n",
616                                 iter->type);
617                         return -EINVAL;
618                 }
619         }
620
621         return 0;
622 }
623
624 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
625                                        struct dmar_res_callback *cb)
626 {
627         return dmar_walk_remapping_entries((void *)(dmar + 1),
628                         dmar->header.length - sizeof(*dmar), cb);
629 }
630
631 /**
632  * parse_dmar_table - parses the DMA reporting table
633  */
634 static int __init
635 parse_dmar_table(void)
636 {
637         struct acpi_table_dmar *dmar;
638         int drhd_count = 0;
639         int ret;
640         struct dmar_res_callback cb = {
641                 .print_entry = true,
642                 .ignore_unhandled = true,
643                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
644                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
645                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
646                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
647                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
648                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
649                 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
650         };
651
652         /*
653          * Do it again, earlier dmar_tbl mapping could be mapped with
654          * fixed map.
655          */
656         dmar_table_detect();
657
658         /*
659          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
660          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
661          */
662         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
663
664         dmar = (struct acpi_table_dmar *)dmar_tbl;
665         if (!dmar)
666                 return -ENODEV;
667
668         if (dmar->width < PAGE_SHIFT - 1) {
669                 pr_warn("Invalid DMAR haw\n");
670                 return -EINVAL;
671         }
672
673         pr_info("Host address width %d\n", dmar->width + 1);
674         ret = dmar_walk_dmar_table(dmar, &cb);
675         if (ret == 0 && drhd_count == 0)
676                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
677
678         return ret;
679 }
680
681 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
682                                  int cnt, struct pci_dev *dev)
683 {
684         int index;
685         struct device *tmp;
686
687         while (dev) {
688                 for_each_active_dev_scope(devices, cnt, index, tmp)
689                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
690                                 return 1;
691
692                 /* Check our parent */
693                 dev = dev->bus->self;
694         }
695
696         return 0;
697 }
698
699 struct dmar_drhd_unit *
700 dmar_find_matched_drhd_unit(struct pci_dev *dev)
701 {
702         struct dmar_drhd_unit *dmaru;
703         struct acpi_dmar_hardware_unit *drhd;
704
705         dev = pci_physfn(dev);
706
707         rcu_read_lock();
708         for_each_drhd_unit(dmaru) {
709                 drhd = container_of(dmaru->hdr,
710                                     struct acpi_dmar_hardware_unit,
711                                     header);
712
713                 if (dmaru->include_all &&
714                     drhd->segment == pci_domain_nr(dev->bus))
715                         goto out;
716
717                 if (dmar_pci_device_match(dmaru->devices,
718                                           dmaru->devices_cnt, dev))
719                         goto out;
720         }
721         dmaru = NULL;
722 out:
723         rcu_read_unlock();
724
725         return dmaru;
726 }
727
728 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
729                                               struct acpi_device *adev)
730 {
731         struct dmar_drhd_unit *dmaru;
732         struct acpi_dmar_hardware_unit *drhd;
733         struct acpi_dmar_device_scope *scope;
734         struct device *tmp;
735         int i;
736         struct acpi_dmar_pci_path *path;
737
738         for_each_drhd_unit(dmaru) {
739                 drhd = container_of(dmaru->hdr,
740                                     struct acpi_dmar_hardware_unit,
741                                     header);
742
743                 for (scope = (void *)(drhd + 1);
744                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
745                      scope = ((void *)scope) + scope->length) {
746                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
747                                 continue;
748                         if (scope->enumeration_id != device_number)
749                                 continue;
750
751                         path = (void *)(scope + 1);
752                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
753                                 dev_name(&adev->dev), dmaru->reg_base_addr,
754                                 scope->bus, path->device, path->function);
755                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
756                                 if (tmp == NULL) {
757                                         dmaru->devices[i].bus = scope->bus;
758                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
759                                                                             path->function);
760                                         rcu_assign_pointer(dmaru->devices[i].dev,
761                                                            get_device(&adev->dev));
762                                         return;
763                                 }
764                         BUG_ON(i >= dmaru->devices_cnt);
765                 }
766         }
767         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
768                 device_number, dev_name(&adev->dev));
769 }
770
771 static int __init dmar_acpi_dev_scope_init(void)
772 {
773         struct acpi_dmar_andd *andd;
774
775         if (dmar_tbl == NULL)
776                 return -ENODEV;
777
778         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
779              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
780              andd = ((void *)andd) + andd->header.length) {
781                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
782                         acpi_handle h;
783                         struct acpi_device *adev;
784
785                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
786                                                           andd->device_name,
787                                                           &h))) {
788                                 pr_err("Failed to find handle for ACPI object %s\n",
789                                        andd->device_name);
790                                 continue;
791                         }
792                         if (acpi_bus_get_device(h, &adev)) {
793                                 pr_err("Failed to get device for ACPI object %s\n",
794                                        andd->device_name);
795                                 continue;
796                         }
797                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
798                 }
799         }
800         return 0;
801 }
802
803 int __init dmar_dev_scope_init(void)
804 {
805         struct pci_dev *dev = NULL;
806         struct dmar_pci_notify_info *info;
807
808         if (dmar_dev_scope_status != 1)
809                 return dmar_dev_scope_status;
810
811         if (list_empty(&dmar_drhd_units)) {
812                 dmar_dev_scope_status = -ENODEV;
813         } else {
814                 dmar_dev_scope_status = 0;
815
816                 dmar_acpi_dev_scope_init();
817
818                 for_each_pci_dev(dev) {
819                         if (dev->is_virtfn)
820                                 continue;
821
822                         info = dmar_alloc_pci_notify_info(dev,
823                                         BUS_NOTIFY_ADD_DEVICE);
824                         if (!info) {
825                                 return dmar_dev_scope_status;
826                         } else {
827                                 dmar_pci_bus_add_dev(info);
828                                 dmar_free_pci_notify_info(info);
829                         }
830                 }
831         }
832
833         return dmar_dev_scope_status;
834 }
835
836 void __init dmar_register_bus_notifier(void)
837 {
838         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
839 }
840
841
842 int __init dmar_table_init(void)
843 {
844         static int dmar_table_initialized;
845         int ret;
846
847         if (dmar_table_initialized == 0) {
848                 ret = parse_dmar_table();
849                 if (ret < 0) {
850                         if (ret != -ENODEV)
851                                 pr_info("Parse DMAR table failure.\n");
852                 } else  if (list_empty(&dmar_drhd_units)) {
853                         pr_info("No DMAR devices found\n");
854                         ret = -ENODEV;
855                 }
856
857                 if (ret < 0)
858                         dmar_table_initialized = ret;
859                 else
860                         dmar_table_initialized = 1;
861         }
862
863         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
864 }
865
866 static void warn_invalid_dmar(u64 addr, const char *message)
867 {
868         pr_warn_once(FW_BUG
869                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
870                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
871                 addr, message,
872                 dmi_get_system_info(DMI_BIOS_VENDOR),
873                 dmi_get_system_info(DMI_BIOS_VERSION),
874                 dmi_get_system_info(DMI_PRODUCT_VERSION));
875         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
876 }
877
878 static int __ref
879 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
880 {
881         struct acpi_dmar_hardware_unit *drhd;
882         void __iomem *addr;
883         u64 cap, ecap;
884
885         drhd = (void *)entry;
886         if (!drhd->address) {
887                 warn_invalid_dmar(0, "");
888                 return -EINVAL;
889         }
890
891         if (arg)
892                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
893         else
894                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
895         if (!addr) {
896                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
897                 return -EINVAL;
898         }
899
900         cap = dmar_readq(addr + DMAR_CAP_REG);
901         ecap = dmar_readq(addr + DMAR_ECAP_REG);
902
903         if (arg)
904                 iounmap(addr);
905         else
906                 early_iounmap(addr, VTD_PAGE_SIZE);
907
908         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
909                 warn_invalid_dmar(drhd->address, " returns all ones");
910                 return -EINVAL;
911         }
912
913         return 0;
914 }
915
916 int __init detect_intel_iommu(void)
917 {
918         int ret;
919         struct dmar_res_callback validate_drhd_cb = {
920                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
921                 .ignore_unhandled = true,
922         };
923
924         down_write(&dmar_global_lock);
925         ret = dmar_table_detect();
926         if (!ret)
927                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
928                                            &validate_drhd_cb);
929         if (!ret && !no_iommu && !iommu_detected &&
930             (!dmar_disabled || dmar_platform_optin())) {
931                 iommu_detected = 1;
932                 /* Make sure ACS will be enabled */
933                 pci_request_acs();
934         }
935
936 #ifdef CONFIG_X86
937         if (!ret) {
938                 x86_init.iommu.iommu_init = intel_iommu_init;
939                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
940         }
941
942 #endif
943
944         if (dmar_tbl) {
945                 acpi_put_table(dmar_tbl);
946                 dmar_tbl = NULL;
947         }
948         up_write(&dmar_global_lock);
949
950         return ret ? ret : 1;
951 }
952
953 static void unmap_iommu(struct intel_iommu *iommu)
954 {
955         iounmap(iommu->reg);
956         release_mem_region(iommu->reg_phys, iommu->reg_size);
957 }
958
959 /**
960  * map_iommu: map the iommu's registers
961  * @iommu: the iommu to map
962  * @phys_addr: the physical address of the base resgister
963  *
964  * Memory map the iommu's registers.  Start w/ a single page, and
965  * possibly expand if that turns out to be insufficent.
966  */
967 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
968 {
969         int map_size, err=0;
970
971         iommu->reg_phys = phys_addr;
972         iommu->reg_size = VTD_PAGE_SIZE;
973
974         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
975                 pr_err("Can't reserve memory\n");
976                 err = -EBUSY;
977                 goto out;
978         }
979
980         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
981         if (!iommu->reg) {
982                 pr_err("Can't map the region\n");
983                 err = -ENOMEM;
984                 goto release;
985         }
986
987         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
988         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
989
990         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
991                 err = -EINVAL;
992                 warn_invalid_dmar(phys_addr, " returns all ones");
993                 goto unmap;
994         }
995         if (ecap_vcs(iommu->ecap))
996                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
997
998         /* the registers might be more than one page */
999         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1000                          cap_max_fault_reg_offset(iommu->cap));
1001         map_size = VTD_PAGE_ALIGN(map_size);
1002         if (map_size > iommu->reg_size) {
1003                 iounmap(iommu->reg);
1004                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1005                 iommu->reg_size = map_size;
1006                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1007                                         iommu->name)) {
1008                         pr_err("Can't reserve memory\n");
1009                         err = -EBUSY;
1010                         goto out;
1011                 }
1012                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1013                 if (!iommu->reg) {
1014                         pr_err("Can't map the region\n");
1015                         err = -ENOMEM;
1016                         goto release;
1017                 }
1018         }
1019         err = 0;
1020         goto out;
1021
1022 unmap:
1023         iounmap(iommu->reg);
1024 release:
1025         release_mem_region(iommu->reg_phys, iommu->reg_size);
1026 out:
1027         return err;
1028 }
1029
1030 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1031 {
1032         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1033                                             DMAR_UNITS_SUPPORTED);
1034         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1035                 iommu->seq_id = -1;
1036         } else {
1037                 set_bit(iommu->seq_id, dmar_seq_ids);
1038                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1039         }
1040
1041         return iommu->seq_id;
1042 }
1043
1044 static void dmar_free_seq_id(struct intel_iommu *iommu)
1045 {
1046         if (iommu->seq_id >= 0) {
1047                 clear_bit(iommu->seq_id, dmar_seq_ids);
1048                 iommu->seq_id = -1;
1049         }
1050 }
1051
1052 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1053 {
1054         struct intel_iommu *iommu;
1055         u32 ver, sts;
1056         int agaw = -1;
1057         int msagaw = -1;
1058         int err;
1059
1060         if (!drhd->reg_base_addr) {
1061                 warn_invalid_dmar(0, "");
1062                 return -EINVAL;
1063         }
1064
1065         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1066         if (!iommu)
1067                 return -ENOMEM;
1068
1069         if (dmar_alloc_seq_id(iommu) < 0) {
1070                 pr_err("Failed to allocate seq_id\n");
1071                 err = -ENOSPC;
1072                 goto error;
1073         }
1074
1075         err = map_iommu(iommu, drhd->reg_base_addr);
1076         if (err) {
1077                 pr_err("Failed to map %s\n", iommu->name);
1078                 goto error_free_seq_id;
1079         }
1080
1081         err = -EINVAL;
1082         if (cap_sagaw(iommu->cap) == 0) {
1083                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1084                         iommu->name);
1085                 drhd->ignored = 1;
1086         }
1087
1088         if (!drhd->ignored) {
1089                 agaw = iommu_calculate_agaw(iommu);
1090                 if (agaw < 0) {
1091                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1092                                iommu->seq_id);
1093                         drhd->ignored = 1;
1094                 }
1095         }
1096         if (!drhd->ignored) {
1097                 msagaw = iommu_calculate_max_sagaw(iommu);
1098                 if (msagaw < 0) {
1099                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1100                                iommu->seq_id);
1101                         drhd->ignored = 1;
1102                         agaw = -1;
1103                 }
1104         }
1105         iommu->agaw = agaw;
1106         iommu->msagaw = msagaw;
1107         iommu->segment = drhd->segment;
1108
1109         iommu->node = NUMA_NO_NODE;
1110
1111         ver = readl(iommu->reg + DMAR_VER_REG);
1112         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1113                 iommu->name,
1114                 (unsigned long long)drhd->reg_base_addr,
1115                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1116                 (unsigned long long)iommu->cap,
1117                 (unsigned long long)iommu->ecap);
1118
1119         /* Reflect status in gcmd */
1120         sts = readl(iommu->reg + DMAR_GSTS_REG);
1121         if (sts & DMA_GSTS_IRES)
1122                 iommu->gcmd |= DMA_GCMD_IRE;
1123         if (sts & DMA_GSTS_TES)
1124                 iommu->gcmd |= DMA_GCMD_TE;
1125         if (sts & DMA_GSTS_QIES)
1126                 iommu->gcmd |= DMA_GCMD_QIE;
1127
1128         raw_spin_lock_init(&iommu->register_lock);
1129
1130         /*
1131          * This is only for hotplug; at boot time intel_iommu_enabled won't
1132          * be set yet. When intel_iommu_init() runs, it registers the units
1133          * present at boot time, then sets intel_iommu_enabled.
1134          */
1135         if (intel_iommu_enabled && !drhd->ignored) {
1136                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1137                                              intel_iommu_groups,
1138                                              "%s", iommu->name);
1139                 if (err)
1140                         goto err_unmap;
1141
1142                 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1143                 if (err)
1144                         goto err_sysfs;
1145         }
1146
1147         drhd->iommu = iommu;
1148         iommu->drhd = drhd;
1149
1150         return 0;
1151
1152 err_sysfs:
1153         iommu_device_sysfs_remove(&iommu->iommu);
1154 err_unmap:
1155         unmap_iommu(iommu);
1156 error_free_seq_id:
1157         dmar_free_seq_id(iommu);
1158 error:
1159         kfree(iommu);
1160         return err;
1161 }
1162
1163 static void free_iommu(struct intel_iommu *iommu)
1164 {
1165         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1166                 iommu_device_unregister(&iommu->iommu);
1167                 iommu_device_sysfs_remove(&iommu->iommu);
1168         }
1169
1170         if (iommu->irq) {
1171                 if (iommu->pr_irq) {
1172                         free_irq(iommu->pr_irq, iommu);
1173                         dmar_free_hwirq(iommu->pr_irq);
1174                         iommu->pr_irq = 0;
1175                 }
1176                 free_irq(iommu->irq, iommu);
1177                 dmar_free_hwirq(iommu->irq);
1178                 iommu->irq = 0;
1179         }
1180
1181         if (iommu->qi) {
1182                 free_page((unsigned long)iommu->qi->desc);
1183                 kfree(iommu->qi->desc_status);
1184                 kfree(iommu->qi);
1185         }
1186
1187         if (iommu->reg)
1188                 unmap_iommu(iommu);
1189
1190         dmar_free_seq_id(iommu);
1191         kfree(iommu);
1192 }
1193
1194 /*
1195  * Reclaim all the submitted descriptors which have completed its work.
1196  */
1197 static inline void reclaim_free_desc(struct q_inval *qi)
1198 {
1199         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1200                qi->desc_status[qi->free_tail] == QI_ABORT) {
1201                 qi->desc_status[qi->free_tail] = QI_FREE;
1202                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1203                 qi->free_cnt++;
1204         }
1205 }
1206
1207 static const char *qi_type_string(u8 type)
1208 {
1209         switch (type) {
1210         case QI_CC_TYPE:
1211                 return "Context-cache Invalidation";
1212         case QI_IOTLB_TYPE:
1213                 return "IOTLB Invalidation";
1214         case QI_DIOTLB_TYPE:
1215                 return "Device-TLB Invalidation";
1216         case QI_IEC_TYPE:
1217                 return "Interrupt Entry Cache Invalidation";
1218         case QI_IWD_TYPE:
1219                 return "Invalidation Wait";
1220         case QI_EIOTLB_TYPE:
1221                 return "PASID-based IOTLB Invalidation";
1222         case QI_PC_TYPE:
1223                 return "PASID-cache Invalidation";
1224         case QI_DEIOTLB_TYPE:
1225                 return "PASID-based Device-TLB Invalidation";
1226         case QI_PGRP_RESP_TYPE:
1227                 return "Page Group Response";
1228         default:
1229                 return "UNKNOWN";
1230         }
1231 }
1232
1233 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1234 {
1235         unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1236         u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1237         struct qi_desc *desc = iommu->qi->desc + head;
1238
1239         if (fault & DMA_FSTS_IQE)
1240                 pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1241                        DMAR_IQER_REG_IQEI(iqe_err));
1242         if (fault & DMA_FSTS_ITE)
1243                 pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1244                        DMAR_IQER_REG_ITESID(iqe_err));
1245         if (fault & DMA_FSTS_ICE)
1246                 pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1247                        DMAR_IQER_REG_ICESID(iqe_err));
1248
1249         pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1250                qi_type_string(desc->qw0 & 0xf),
1251                (unsigned long long)desc->qw0,
1252                (unsigned long long)desc->qw1);
1253
1254         head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1255         head <<= qi_shift(iommu);
1256         desc = iommu->qi->desc + head;
1257
1258         pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1259                qi_type_string(desc->qw0 & 0xf),
1260                (unsigned long long)desc->qw0,
1261                (unsigned long long)desc->qw1);
1262 }
1263
1264 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1265 {
1266         u32 fault;
1267         int head, tail;
1268         struct q_inval *qi = iommu->qi;
1269         int shift = qi_shift(iommu);
1270
1271         if (qi->desc_status[wait_index] == QI_ABORT)
1272                 return -EAGAIN;
1273
1274         fault = readl(iommu->reg + DMAR_FSTS_REG);
1275         if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1276                 qi_dump_fault(iommu, fault);
1277
1278         /*
1279          * If IQE happens, the head points to the descriptor associated
1280          * with the error. No new descriptors are fetched until the IQE
1281          * is cleared.
1282          */
1283         if (fault & DMA_FSTS_IQE) {
1284                 head = readl(iommu->reg + DMAR_IQH_REG);
1285                 if ((head >> shift) == index) {
1286                         struct qi_desc *desc = qi->desc + head;
1287
1288                         /*
1289                          * desc->qw2 and desc->qw3 are either reserved or
1290                          * used by software as private data. We won't print
1291                          * out these two qw's for security consideration.
1292                          */
1293                         memcpy(desc, qi->desc + (wait_index << shift),
1294                                1 << shift);
1295                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1296                         pr_info("Invalidation Queue Error (IQE) cleared\n");
1297                         return -EINVAL;
1298                 }
1299         }
1300
1301         /*
1302          * If ITE happens, all pending wait_desc commands are aborted.
1303          * No new descriptors are fetched until the ITE is cleared.
1304          */
1305         if (fault & DMA_FSTS_ITE) {
1306                 head = readl(iommu->reg + DMAR_IQH_REG);
1307                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1308                 head |= 1;
1309                 tail = readl(iommu->reg + DMAR_IQT_REG);
1310                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1311
1312                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1313                 pr_info("Invalidation Time-out Error (ITE) cleared\n");
1314
1315                 do {
1316                         if (qi->desc_status[head] == QI_IN_USE)
1317                                 qi->desc_status[head] = QI_ABORT;
1318                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1319                 } while (head != tail);
1320
1321                 if (qi->desc_status[wait_index] == QI_ABORT)
1322                         return -EAGAIN;
1323         }
1324
1325         if (fault & DMA_FSTS_ICE) {
1326                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1327                 pr_info("Invalidation Completion Error (ICE) cleared\n");
1328         }
1329
1330         return 0;
1331 }
1332
1333 /*
1334  * Function to submit invalidation descriptors of all types to the queued
1335  * invalidation interface(QI). Multiple descriptors can be submitted at a
1336  * time, a wait descriptor will be appended to each submission to ensure
1337  * hardware has completed the invalidation before return. Wait descriptors
1338  * can be part of the submission but it will not be polled for completion.
1339  */
1340 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1341                    unsigned int count, unsigned long options)
1342 {
1343         struct q_inval *qi = iommu->qi;
1344         s64 devtlb_start_ktime = 0;
1345         s64 iotlb_start_ktime = 0;
1346         s64 iec_start_ktime = 0;
1347         struct qi_desc wait_desc;
1348         int wait_index, index;
1349         unsigned long flags;
1350         int offset, shift;
1351         int rc, i;
1352         u64 type;
1353
1354         if (!qi)
1355                 return 0;
1356
1357         type = desc->qw0 & GENMASK_ULL(3, 0);
1358
1359         if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1360             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1361                 iotlb_start_ktime = ktime_to_ns(ktime_get());
1362
1363         if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1364             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1365                 devtlb_start_ktime = ktime_to_ns(ktime_get());
1366
1367         if (type == QI_IEC_TYPE &&
1368             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1369                 iec_start_ktime = ktime_to_ns(ktime_get());
1370
1371 restart:
1372         rc = 0;
1373
1374         raw_spin_lock_irqsave(&qi->q_lock, flags);
1375         /*
1376          * Check if we have enough empty slots in the queue to submit,
1377          * the calculation is based on:
1378          * # of desc + 1 wait desc + 1 space between head and tail
1379          */
1380         while (qi->free_cnt < count + 2) {
1381                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1382                 cpu_relax();
1383                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1384         }
1385
1386         index = qi->free_head;
1387         wait_index = (index + count) % QI_LENGTH;
1388         shift = qi_shift(iommu);
1389
1390         for (i = 0; i < count; i++) {
1391                 offset = ((index + i) % QI_LENGTH) << shift;
1392                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1393                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1394                 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1395                                 desc[i].qw2, desc[i].qw3);
1396         }
1397         qi->desc_status[wait_index] = QI_IN_USE;
1398
1399         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1400                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1401         if (options & QI_OPT_WAIT_DRAIN)
1402                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1403         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1404         wait_desc.qw2 = 0;
1405         wait_desc.qw3 = 0;
1406
1407         offset = wait_index << shift;
1408         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1409
1410         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1411         qi->free_cnt -= count + 1;
1412
1413         /*
1414          * update the HW tail register indicating the presence of
1415          * new descriptors.
1416          */
1417         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1418
1419         while (qi->desc_status[wait_index] != QI_DONE) {
1420                 /*
1421                  * We will leave the interrupts disabled, to prevent interrupt
1422                  * context to queue another cmd while a cmd is already submitted
1423                  * and waiting for completion on this cpu. This is to avoid
1424                  * a deadlock where the interrupt context can wait indefinitely
1425                  * for free slots in the queue.
1426                  */
1427                 rc = qi_check_fault(iommu, index, wait_index);
1428                 if (rc)
1429                         break;
1430
1431                 raw_spin_unlock(&qi->q_lock);
1432                 cpu_relax();
1433                 raw_spin_lock(&qi->q_lock);
1434         }
1435
1436         for (i = 0; i < count; i++)
1437                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1438
1439         reclaim_free_desc(qi);
1440         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1441
1442         if (rc == -EAGAIN)
1443                 goto restart;
1444
1445         if (iotlb_start_ktime)
1446                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1447                                 ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1448
1449         if (devtlb_start_ktime)
1450                 dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1451                                 ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1452
1453         if (iec_start_ktime)
1454                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1455                                 ktime_to_ns(ktime_get()) - iec_start_ktime);
1456
1457         return rc;
1458 }
1459
1460 /*
1461  * Flush the global interrupt entry cache.
1462  */
1463 void qi_global_iec(struct intel_iommu *iommu)
1464 {
1465         struct qi_desc desc;
1466
1467         desc.qw0 = QI_IEC_TYPE;
1468         desc.qw1 = 0;
1469         desc.qw2 = 0;
1470         desc.qw3 = 0;
1471
1472         /* should never fail */
1473         qi_submit_sync(iommu, &desc, 1, 0);
1474 }
1475
1476 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1477                       u64 type)
1478 {
1479         struct qi_desc desc;
1480
1481         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1482                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1483         desc.qw1 = 0;
1484         desc.qw2 = 0;
1485         desc.qw3 = 0;
1486
1487         qi_submit_sync(iommu, &desc, 1, 0);
1488 }
1489
1490 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1491                     unsigned int size_order, u64 type)
1492 {
1493         u8 dw = 0, dr = 0;
1494
1495         struct qi_desc desc;
1496         int ih = 0;
1497
1498         if (cap_write_drain(iommu->cap))
1499                 dw = 1;
1500
1501         if (cap_read_drain(iommu->cap))
1502                 dr = 1;
1503
1504         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1505                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1506         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1507                 | QI_IOTLB_AM(size_order);
1508         desc.qw2 = 0;
1509         desc.qw3 = 0;
1510
1511         qi_submit_sync(iommu, &desc, 1, 0);
1512 }
1513
1514 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1515                         u16 qdep, u64 addr, unsigned mask)
1516 {
1517         struct qi_desc desc;
1518
1519         if (mask) {
1520                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1521                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1522         } else
1523                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1524
1525         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1526                 qdep = 0;
1527
1528         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1529                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1530         desc.qw2 = 0;
1531         desc.qw3 = 0;
1532
1533         qi_submit_sync(iommu, &desc, 1, 0);
1534 }
1535
1536 /* PASID-based IOTLB invalidation */
1537 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1538                      unsigned long npages, bool ih)
1539 {
1540         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1541
1542         /*
1543          * npages == -1 means a PASID-selective invalidation, otherwise,
1544          * a positive value for Page-selective-within-PASID invalidation.
1545          * 0 is not a valid input.
1546          */
1547         if (WARN_ON(!npages)) {
1548                 pr_err("Invalid input npages = %ld\n", npages);
1549                 return;
1550         }
1551
1552         if (npages == -1) {
1553                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1554                                 QI_EIOTLB_DID(did) |
1555                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1556                                 QI_EIOTLB_TYPE;
1557                 desc.qw1 = 0;
1558         } else {
1559                 int mask = ilog2(__roundup_pow_of_two(npages));
1560                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1561
1562                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1563                         addr = ALIGN_DOWN(addr, align);
1564
1565                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1566                                 QI_EIOTLB_DID(did) |
1567                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1568                                 QI_EIOTLB_TYPE;
1569                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1570                                 QI_EIOTLB_IH(ih) |
1571                                 QI_EIOTLB_AM(mask);
1572         }
1573
1574         qi_submit_sync(iommu, &desc, 1, 0);
1575 }
1576
1577 /* PASID-based device IOTLB Invalidate */
1578 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1579                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1580 {
1581         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1582         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1583
1584         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1585                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1586                 QI_DEV_IOTLB_PFSID(pfsid);
1587
1588         /*
1589          * If S bit is 0, we only flush a single page. If S bit is set,
1590          * The least significant zero bit indicates the invalidation address
1591          * range. VT-d spec 6.5.2.6.
1592          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1593          * size order = 0 is PAGE_SIZE 4KB
1594          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1595          * ECAP.
1596          */
1597         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1598                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1599                                     addr, size_order);
1600
1601         /* Take page address */
1602         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1603
1604         if (size_order) {
1605                 /*
1606                  * Existing 0s in address below size_order may be the least
1607                  * significant bit, we must set them to 1s to avoid having
1608                  * smaller size than desired.
1609                  */
1610                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1611                                         VTD_PAGE_SHIFT);
1612                 /* Clear size_order bit to indicate size */
1613                 desc.qw1 &= ~mask;
1614                 /* Set the S bit to indicate flushing more than 1 page */
1615                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1616         }
1617
1618         qi_submit_sync(iommu, &desc, 1, 0);
1619 }
1620
1621 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1622                           u64 granu, u32 pasid)
1623 {
1624         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1625
1626         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1627                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1628         qi_submit_sync(iommu, &desc, 1, 0);
1629 }
1630
1631 /*
1632  * Disable Queued Invalidation interface.
1633  */
1634 void dmar_disable_qi(struct intel_iommu *iommu)
1635 {
1636         unsigned long flags;
1637         u32 sts;
1638         cycles_t start_time = get_cycles();
1639
1640         if (!ecap_qis(iommu->ecap))
1641                 return;
1642
1643         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1644
1645         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1646         if (!(sts & DMA_GSTS_QIES))
1647                 goto end;
1648
1649         /*
1650          * Give a chance to HW to complete the pending invalidation requests.
1651          */
1652         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1653                 readl(iommu->reg + DMAR_IQH_REG)) &&
1654                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1655                 cpu_relax();
1656
1657         iommu->gcmd &= ~DMA_GCMD_QIE;
1658         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1659
1660         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1661                       !(sts & DMA_GSTS_QIES), sts);
1662 end:
1663         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1664 }
1665
1666 /*
1667  * Enable queued invalidation.
1668  */
1669 static void __dmar_enable_qi(struct intel_iommu *iommu)
1670 {
1671         u32 sts;
1672         unsigned long flags;
1673         struct q_inval *qi = iommu->qi;
1674         u64 val = virt_to_phys(qi->desc);
1675
1676         qi->free_head = qi->free_tail = 0;
1677         qi->free_cnt = QI_LENGTH;
1678
1679         /*
1680          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1681          * is present.
1682          */
1683         if (ecap_smts(iommu->ecap))
1684                 val |= (1 << 11) | 1;
1685
1686         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1687
1688         /* write zero to the tail reg */
1689         writel(0, iommu->reg + DMAR_IQT_REG);
1690
1691         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1692
1693         iommu->gcmd |= DMA_GCMD_QIE;
1694         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1695
1696         /* Make sure hardware complete it */
1697         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1698
1699         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1700 }
1701
1702 /*
1703  * Enable Queued Invalidation interface. This is a must to support
1704  * interrupt-remapping. Also used by DMA-remapping, which replaces
1705  * register based IOTLB invalidation.
1706  */
1707 int dmar_enable_qi(struct intel_iommu *iommu)
1708 {
1709         struct q_inval *qi;
1710         struct page *desc_page;
1711
1712         if (!ecap_qis(iommu->ecap))
1713                 return -ENOENT;
1714
1715         /*
1716          * queued invalidation is already setup and enabled.
1717          */
1718         if (iommu->qi)
1719                 return 0;
1720
1721         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1722         if (!iommu->qi)
1723                 return -ENOMEM;
1724
1725         qi = iommu->qi;
1726
1727         /*
1728          * Need two pages to accommodate 256 descriptors of 256 bits each
1729          * if the remapping hardware supports scalable mode translation.
1730          */
1731         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1732                                      !!ecap_smts(iommu->ecap));
1733         if (!desc_page) {
1734                 kfree(qi);
1735                 iommu->qi = NULL;
1736                 return -ENOMEM;
1737         }
1738
1739         qi->desc = page_address(desc_page);
1740
1741         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1742         if (!qi->desc_status) {
1743                 free_page((unsigned long) qi->desc);
1744                 kfree(qi);
1745                 iommu->qi = NULL;
1746                 return -ENOMEM;
1747         }
1748
1749         raw_spin_lock_init(&qi->q_lock);
1750
1751         __dmar_enable_qi(iommu);
1752
1753         return 0;
1754 }
1755
1756 /* iommu interrupt handling. Most stuff are MSI-like. */
1757
1758 enum faulttype {
1759         DMA_REMAP,
1760         INTR_REMAP,
1761         UNKNOWN,
1762 };
1763
1764 static const char *dma_remap_fault_reasons[] =
1765 {
1766         "Software",
1767         "Present bit in root entry is clear",
1768         "Present bit in context entry is clear",
1769         "Invalid context entry",
1770         "Access beyond MGAW",
1771         "PTE Write access is not set",
1772         "PTE Read access is not set",
1773         "Next page table ptr is invalid",
1774         "Root table address invalid",
1775         "Context table ptr is invalid",
1776         "non-zero reserved fields in RTP",
1777         "non-zero reserved fields in CTP",
1778         "non-zero reserved fields in PTE",
1779         "PCE for translation request specifies blocking",
1780 };
1781
1782 static const char * const dma_remap_sm_fault_reasons[] = {
1783         "SM: Invalid Root Table Address",
1784         "SM: TTM 0 for request with PASID",
1785         "SM: TTM 0 for page group request",
1786         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1787         "SM: Error attempting to access Root Entry",
1788         "SM: Present bit in Root Entry is clear",
1789         "SM: Non-zero reserved field set in Root Entry",
1790         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1791         "SM: Error attempting to access Context Entry",
1792         "SM: Present bit in Context Entry is clear",
1793         "SM: Non-zero reserved field set in the Context Entry",
1794         "SM: Invalid Context Entry",
1795         "SM: DTE field in Context Entry is clear",
1796         "SM: PASID Enable field in Context Entry is clear",
1797         "SM: PASID is larger than the max in Context Entry",
1798         "SM: PRE field in Context-Entry is clear",
1799         "SM: RID_PASID field error in Context-Entry",
1800         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1801         "SM: Error attempting to access the PASID Directory Entry",
1802         "SM: Present bit in Directory Entry is clear",
1803         "SM: Non-zero reserved field set in PASID Directory Entry",
1804         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1805         "SM: Error attempting to access PASID Table Entry",
1806         "SM: Present bit in PASID Table Entry is clear",
1807         "SM: Non-zero reserved field set in PASID Table Entry",
1808         "SM: Invalid Scalable-Mode PASID Table Entry",
1809         "SM: ERE field is clear in PASID Table Entry",
1810         "SM: SRE field is clear in PASID Table Entry",
1811         "Unknown", "Unknown",/* 0x5E-0x5F */
1812         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1813         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1814         "SM: Error attempting to access first-level paging entry",
1815         "SM: Present bit in first-level paging entry is clear",
1816         "SM: Non-zero reserved field set in first-level paging entry",
1817         "SM: Error attempting to access FL-PML4 entry",
1818         "SM: First-level entry address beyond MGAW in Nested translation",
1819         "SM: Read permission error in FL-PML4 entry in Nested translation",
1820         "SM: Read permission error in first-level paging entry in Nested translation",
1821         "SM: Write permission error in first-level paging entry in Nested translation",
1822         "SM: Error attempting to access second-level paging entry",
1823         "SM: Read/Write permission error in second-level paging entry",
1824         "SM: Non-zero reserved field set in second-level paging entry",
1825         "SM: Invalid second-level page table pointer",
1826         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1827         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1828         "SM: Address in first-level translation is not canonical",
1829         "SM: U/S set 0 for first-level translation with user privilege",
1830         "SM: No execute permission for request with PASID and ER=1",
1831         "SM: Address beyond the DMA hardware max",
1832         "SM: Second-level entry address beyond the max",
1833         "SM: No write permission for Write/AtomicOp request",
1834         "SM: No read permission for Read/AtomicOp request",
1835         "SM: Invalid address-interrupt address",
1836         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1837         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1838 };
1839
1840 static const char *irq_remap_fault_reasons[] =
1841 {
1842         "Detected reserved fields in the decoded interrupt-remapped request",
1843         "Interrupt index exceeded the interrupt-remapping table size",
1844         "Present field in the IRTE entry is clear",
1845         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1846         "Detected reserved fields in the IRTE entry",
1847         "Blocked a compatibility format interrupt request",
1848         "Blocked an interrupt request due to source-id verification failure",
1849 };
1850
1851 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1852 {
1853         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1854                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1855                 *fault_type = INTR_REMAP;
1856                 return irq_remap_fault_reasons[fault_reason - 0x20];
1857         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1858                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1859                 *fault_type = DMA_REMAP;
1860                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1861         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1862                 *fault_type = DMA_REMAP;
1863                 return dma_remap_fault_reasons[fault_reason];
1864         } else {
1865                 *fault_type = UNKNOWN;
1866                 return "Unknown";
1867         }
1868 }
1869
1870
1871 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1872 {
1873         if (iommu->irq == irq)
1874                 return DMAR_FECTL_REG;
1875         else if (iommu->pr_irq == irq)
1876                 return DMAR_PECTL_REG;
1877         else
1878                 BUG();
1879 }
1880
1881 void dmar_msi_unmask(struct irq_data *data)
1882 {
1883         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1884         int reg = dmar_msi_reg(iommu, data->irq);
1885         unsigned long flag;
1886
1887         /* unmask it */
1888         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1889         writel(0, iommu->reg + reg);
1890         /* Read a reg to force flush the post write */
1891         readl(iommu->reg + reg);
1892         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1893 }
1894
1895 void dmar_msi_mask(struct irq_data *data)
1896 {
1897         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1898         int reg = dmar_msi_reg(iommu, data->irq);
1899         unsigned long flag;
1900
1901         /* mask it */
1902         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1903         writel(DMA_FECTL_IM, iommu->reg + reg);
1904         /* Read a reg to force flush the post write */
1905         readl(iommu->reg + reg);
1906         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1907 }
1908
1909 void dmar_msi_write(int irq, struct msi_msg *msg)
1910 {
1911         struct intel_iommu *iommu = irq_get_handler_data(irq);
1912         int reg = dmar_msi_reg(iommu, irq);
1913         unsigned long flag;
1914
1915         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1916         writel(msg->data, iommu->reg + reg + 4);
1917         writel(msg->address_lo, iommu->reg + reg + 8);
1918         writel(msg->address_hi, iommu->reg + reg + 12);
1919         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1920 }
1921
1922 void dmar_msi_read(int irq, struct msi_msg *msg)
1923 {
1924         struct intel_iommu *iommu = irq_get_handler_data(irq);
1925         int reg = dmar_msi_reg(iommu, irq);
1926         unsigned long flag;
1927
1928         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1929         msg->data = readl(iommu->reg + reg + 4);
1930         msg->address_lo = readl(iommu->reg + reg + 8);
1931         msg->address_hi = readl(iommu->reg + reg + 12);
1932         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1933 }
1934
1935 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1936                 u8 fault_reason, u32 pasid, u16 source_id,
1937                 unsigned long long addr)
1938 {
1939         const char *reason;
1940         int fault_type;
1941
1942         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1943
1944         if (fault_type == INTR_REMAP) {
1945                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1946                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1947                        PCI_FUNC(source_id & 0xFF), addr >> 48,
1948                        fault_reason, reason);
1949
1950                 return 0;
1951         }
1952
1953         if (pasid == INVALID_IOASID)
1954                 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1955                        type ? "DMA Read" : "DMA Write",
1956                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1957                        PCI_FUNC(source_id & 0xFF), addr,
1958                        fault_reason, reason);
1959         else
1960                 pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1961                        type ? "DMA Read" : "DMA Write", pasid,
1962                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1963                        PCI_FUNC(source_id & 0xFF), addr,
1964                        fault_reason, reason);
1965
1966         dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
1967
1968         return 0;
1969 }
1970
1971 #define PRIMARY_FAULT_REG_LEN (16)
1972 irqreturn_t dmar_fault(int irq, void *dev_id)
1973 {
1974         struct intel_iommu *iommu = dev_id;
1975         int reg, fault_index;
1976         u32 fault_status;
1977         unsigned long flag;
1978         static DEFINE_RATELIMIT_STATE(rs,
1979                                       DEFAULT_RATELIMIT_INTERVAL,
1980                                       DEFAULT_RATELIMIT_BURST);
1981
1982         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1983         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1984         if (fault_status && __ratelimit(&rs))
1985                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1986
1987         /* TBD: ignore advanced fault log currently */
1988         if (!(fault_status & DMA_FSTS_PPF))
1989                 goto unlock_exit;
1990
1991         fault_index = dma_fsts_fault_record_index(fault_status);
1992         reg = cap_fault_reg_offset(iommu->cap);
1993         while (1) {
1994                 /* Disable printing, simply clear the fault when ratelimited */
1995                 bool ratelimited = !__ratelimit(&rs);
1996                 u8 fault_reason;
1997                 u16 source_id;
1998                 u64 guest_addr;
1999                 u32 pasid;
2000                 int type;
2001                 u32 data;
2002                 bool pasid_present;
2003
2004                 /* highest 32 bits */
2005                 data = readl(iommu->reg + reg +
2006                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
2007                 if (!(data & DMA_FRCD_F))
2008                         break;
2009
2010                 if (!ratelimited) {
2011                         fault_reason = dma_frcd_fault_reason(data);
2012                         type = dma_frcd_type(data);
2013
2014                         pasid = dma_frcd_pasid_value(data);
2015                         data = readl(iommu->reg + reg +
2016                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
2017                         source_id = dma_frcd_source_id(data);
2018
2019                         pasid_present = dma_frcd_pasid_present(data);
2020                         guest_addr = dmar_readq(iommu->reg + reg +
2021                                         fault_index * PRIMARY_FAULT_REG_LEN);
2022                         guest_addr = dma_frcd_page_addr(guest_addr);
2023                 }
2024
2025                 /* clear the fault */
2026                 writel(DMA_FRCD_F, iommu->reg + reg +
2027                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
2028
2029                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2030
2031                 if (!ratelimited)
2032                         /* Using pasid -1 if pasid is not present */
2033                         dmar_fault_do_one(iommu, type, fault_reason,
2034                                           pasid_present ? pasid : INVALID_IOASID,
2035                                           source_id, guest_addr);
2036
2037                 fault_index++;
2038                 if (fault_index >= cap_num_fault_regs(iommu->cap))
2039                         fault_index = 0;
2040                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2041         }
2042
2043         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2044                iommu->reg + DMAR_FSTS_REG);
2045
2046 unlock_exit:
2047         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2048         return IRQ_HANDLED;
2049 }
2050
2051 int dmar_set_interrupt(struct intel_iommu *iommu)
2052 {
2053         int irq, ret;
2054
2055         /*
2056          * Check if the fault interrupt is already initialized.
2057          */
2058         if (iommu->irq)
2059                 return 0;
2060
2061         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2062         if (irq > 0) {
2063                 iommu->irq = irq;
2064         } else {
2065                 pr_err("No free IRQ vectors\n");
2066                 return -EINVAL;
2067         }
2068
2069         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2070         if (ret)
2071                 pr_err("Can't request irq\n");
2072         return ret;
2073 }
2074
2075 int __init enable_drhd_fault_handling(void)
2076 {
2077         struct dmar_drhd_unit *drhd;
2078         struct intel_iommu *iommu;
2079
2080         /*
2081          * Enable fault control interrupt.
2082          */
2083         for_each_iommu(iommu, drhd) {
2084                 u32 fault_status;
2085                 int ret = dmar_set_interrupt(iommu);
2086
2087                 if (ret) {
2088                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2089                                (unsigned long long)drhd->reg_base_addr, ret);
2090                         return -1;
2091                 }
2092
2093                 /*
2094                  * Clear any previous faults.
2095                  */
2096                 dmar_fault(iommu->irq, iommu);
2097                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2098                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2099         }
2100
2101         return 0;
2102 }
2103
2104 /*
2105  * Re-enable Queued Invalidation interface.
2106  */
2107 int dmar_reenable_qi(struct intel_iommu *iommu)
2108 {
2109         if (!ecap_qis(iommu->ecap))
2110                 return -ENOENT;
2111
2112         if (!iommu->qi)
2113                 return -ENOENT;
2114
2115         /*
2116          * First disable queued invalidation.
2117          */
2118         dmar_disable_qi(iommu);
2119         /*
2120          * Then enable queued invalidation again. Since there is no pending
2121          * invalidation requests now, it's safe to re-enable queued
2122          * invalidation.
2123          */
2124         __dmar_enable_qi(iommu);
2125
2126         return 0;
2127 }
2128
2129 /*
2130  * Check interrupt remapping support in DMAR table description.
2131  */
2132 int __init dmar_ir_support(void)
2133 {
2134         struct acpi_table_dmar *dmar;
2135         dmar = (struct acpi_table_dmar *)dmar_tbl;
2136         if (!dmar)
2137                 return 0;
2138         return dmar->flags & 0x1;
2139 }
2140
2141 /* Check whether DMAR units are in use */
2142 static inline bool dmar_in_use(void)
2143 {
2144         return irq_remapping_enabled || intel_iommu_enabled;
2145 }
2146
2147 static int __init dmar_free_unused_resources(void)
2148 {
2149         struct dmar_drhd_unit *dmaru, *dmaru_n;
2150
2151         if (dmar_in_use())
2152                 return 0;
2153
2154         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2155                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2156
2157         down_write(&dmar_global_lock);
2158         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2159                 list_del(&dmaru->list);
2160                 dmar_free_drhd(dmaru);
2161         }
2162         up_write(&dmar_global_lock);
2163
2164         return 0;
2165 }
2166
2167 late_initcall(dmar_free_unused_resources);
2168 IOMMU_INIT_POST(detect_intel_iommu);
2169
2170 /*
2171  * DMAR Hotplug Support
2172  * For more details, please refer to Intel(R) Virtualization Technology
2173  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2174  * "Remapping Hardware Unit Hot Plug".
2175  */
2176 static guid_t dmar_hp_guid =
2177         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2178                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2179
2180 /*
2181  * Currently there's only one revision and BIOS will not check the revision id,
2182  * so use 0 for safety.
2183  */
2184 #define DMAR_DSM_REV_ID                 0
2185 #define DMAR_DSM_FUNC_DRHD              1
2186 #define DMAR_DSM_FUNC_ATSR              2
2187 #define DMAR_DSM_FUNC_RHSA              3
2188 #define DMAR_DSM_FUNC_SATC              4
2189
2190 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2191 {
2192         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2193 }
2194
2195 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2196                                   dmar_res_handler_t handler, void *arg)
2197 {
2198         int ret = -ENODEV;
2199         union acpi_object *obj;
2200         struct acpi_dmar_header *start;
2201         struct dmar_res_callback callback;
2202         static int res_type[] = {
2203                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2204                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2205                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2206                 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2207         };
2208
2209         if (!dmar_detect_dsm(handle, func))
2210                 return 0;
2211
2212         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2213                                       func, NULL, ACPI_TYPE_BUFFER);
2214         if (!obj)
2215                 return -ENODEV;
2216
2217         memset(&callback, 0, sizeof(callback));
2218         callback.cb[res_type[func]] = handler;
2219         callback.arg[res_type[func]] = arg;
2220         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2221         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2222
2223         ACPI_FREE(obj);
2224
2225         return ret;
2226 }
2227
2228 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2229 {
2230         int ret;
2231         struct dmar_drhd_unit *dmaru;
2232
2233         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2234         if (!dmaru)
2235                 return -ENODEV;
2236
2237         ret = dmar_ir_hotplug(dmaru, true);
2238         if (ret == 0)
2239                 ret = dmar_iommu_hotplug(dmaru, true);
2240
2241         return ret;
2242 }
2243
2244 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2245 {
2246         int i, ret;
2247         struct device *dev;
2248         struct dmar_drhd_unit *dmaru;
2249
2250         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2251         if (!dmaru)
2252                 return 0;
2253
2254         /*
2255          * All PCI devices managed by this unit should have been destroyed.
2256          */
2257         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2258                 for_each_active_dev_scope(dmaru->devices,
2259                                           dmaru->devices_cnt, i, dev)
2260                         return -EBUSY;
2261         }
2262
2263         ret = dmar_ir_hotplug(dmaru, false);
2264         if (ret == 0)
2265                 ret = dmar_iommu_hotplug(dmaru, false);
2266
2267         return ret;
2268 }
2269
2270 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2271 {
2272         struct dmar_drhd_unit *dmaru;
2273
2274         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2275         if (dmaru) {
2276                 list_del_rcu(&dmaru->list);
2277                 synchronize_rcu();
2278                 dmar_free_drhd(dmaru);
2279         }
2280
2281         return 0;
2282 }
2283
2284 static int dmar_hotplug_insert(acpi_handle handle)
2285 {
2286         int ret;
2287         int drhd_count = 0;
2288
2289         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2290                                      &dmar_validate_one_drhd, (void *)1);
2291         if (ret)
2292                 goto out;
2293
2294         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2295                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2296         if (ret == 0 && drhd_count == 0) {
2297                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2298                 goto out;
2299         } else if (ret) {
2300                 goto release_drhd;
2301         }
2302
2303         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2304                                      &dmar_parse_one_rhsa, NULL);
2305         if (ret)
2306                 goto release_drhd;
2307
2308         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2309                                      &dmar_parse_one_atsr, NULL);
2310         if (ret)
2311                 goto release_atsr;
2312
2313         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2314                                      &dmar_hp_add_drhd, NULL);
2315         if (!ret)
2316                 return 0;
2317
2318         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2319                                &dmar_hp_remove_drhd, NULL);
2320 release_atsr:
2321         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2322                                &dmar_release_one_atsr, NULL);
2323 release_drhd:
2324         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2325                                &dmar_hp_release_drhd, NULL);
2326 out:
2327         return ret;
2328 }
2329
2330 static int dmar_hotplug_remove(acpi_handle handle)
2331 {
2332         int ret;
2333
2334         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2335                                      &dmar_check_one_atsr, NULL);
2336         if (ret)
2337                 return ret;
2338
2339         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2340                                      &dmar_hp_remove_drhd, NULL);
2341         if (ret == 0) {
2342                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2343                                                &dmar_release_one_atsr, NULL));
2344                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2345                                                &dmar_hp_release_drhd, NULL));
2346         } else {
2347                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2348                                        &dmar_hp_add_drhd, NULL);
2349         }
2350
2351         return ret;
2352 }
2353
2354 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2355                                        void *context, void **retval)
2356 {
2357         acpi_handle *phdl = retval;
2358
2359         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2360                 *phdl = handle;
2361                 return AE_CTRL_TERMINATE;
2362         }
2363
2364         return AE_OK;
2365 }
2366
2367 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2368 {
2369         int ret;
2370         acpi_handle tmp = NULL;
2371         acpi_status status;
2372
2373         if (!dmar_in_use())
2374                 return 0;
2375
2376         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2377                 tmp = handle;
2378         } else {
2379                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2380                                              ACPI_UINT32_MAX,
2381                                              dmar_get_dsm_handle,
2382                                              NULL, NULL, &tmp);
2383                 if (ACPI_FAILURE(status)) {
2384                         pr_warn("Failed to locate _DSM method.\n");
2385                         return -ENXIO;
2386                 }
2387         }
2388         if (tmp == NULL)
2389                 return 0;
2390
2391         down_write(&dmar_global_lock);
2392         if (insert)
2393                 ret = dmar_hotplug_insert(tmp);
2394         else
2395                 ret = dmar_hotplug_remove(tmp);
2396         up_write(&dmar_global_lock);
2397
2398         return ret;
2399 }
2400
2401 int dmar_device_add(acpi_handle handle)
2402 {
2403         return dmar_device_hotplug(handle, true);
2404 }
2405
2406 int dmar_device_remove(acpi_handle handle)
2407 {
2408         return dmar_device_hotplug(handle, false);
2409 }
2410
2411 /*
2412  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2413  *
2414  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2415  * the ACPI DMAR table. This means that the platform boot firmware has made
2416  * sure no device can issue DMA outside of RMRR regions.
2417  */
2418 bool dmar_platform_optin(void)
2419 {
2420         struct acpi_table_dmar *dmar;
2421         acpi_status status;
2422         bool ret;
2423
2424         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2425                                 (struct acpi_table_header **)&dmar);
2426         if (ACPI_FAILURE(status))
2427                 return false;
2428
2429         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2430         acpi_put_table((struct acpi_table_header *)dmar);
2431
2432         return ret;
2433 }
2434 EXPORT_SYMBOL_GPL(dmar_platform_optin);