x86, dmar: move page fault handling code to dmar.c
[linux-2.6-microblaze.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
177         if (!dmaru)
178                 return -ENOMEM;
179
180         dmaru->hdr = header;
181         drhd = (struct acpi_dmar_hardware_unit *)header;
182         dmaru->reg_base_addr = drhd->address;
183         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184
185         ret = alloc_iommu(dmaru);
186         if (ret) {
187                 kfree(dmaru);
188                 return ret;
189         }
190         dmar_register_drhd_unit(dmaru);
191         return 0;
192 }
193
194 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
195 {
196         struct acpi_dmar_hardware_unit *drhd;
197         int ret = 0;
198
199         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
200
201         if (dmaru->include_all)
202                 return 0;
203
204         ret = dmar_parse_dev_scope((void *)(drhd + 1),
205                                 ((void *)drhd) + drhd->header.length,
206                                 &dmaru->devices_cnt, &dmaru->devices,
207                                 drhd->segment);
208         if (ret) {
209                 list_del(&dmaru->list);
210                 kfree(dmaru);
211         }
212         return ret;
213 }
214
215 #ifdef CONFIG_DMAR
216 LIST_HEAD(dmar_rmrr_units);
217
218 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
219 {
220         list_add(&rmrr->list, &dmar_rmrr_units);
221 }
222
223
224 static int __init
225 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
226 {
227         struct acpi_dmar_reserved_memory *rmrr;
228         struct dmar_rmrr_unit *rmrru;
229
230         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
231         if (!rmrru)
232                 return -ENOMEM;
233
234         rmrru->hdr = header;
235         rmrr = (struct acpi_dmar_reserved_memory *)header;
236         rmrru->base_address = rmrr->base_address;
237         rmrru->end_address = rmrr->end_address;
238
239         dmar_register_rmrr_unit(rmrru);
240         return 0;
241 }
242
243 static int __init
244 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
245 {
246         struct acpi_dmar_reserved_memory *rmrr;
247         int ret;
248
249         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
250         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
251                 ((void *)rmrr) + rmrr->header.length,
252                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
253
254         if (ret || (rmrru->devices_cnt == 0)) {
255                 list_del(&rmrru->list);
256                 kfree(rmrru);
257         }
258         return ret;
259 }
260 #endif
261
262 static void __init
263 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
264 {
265         struct acpi_dmar_hardware_unit *drhd;
266         struct acpi_dmar_reserved_memory *rmrr;
267
268         switch (header->type) {
269         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
270                 drhd = (struct acpi_dmar_hardware_unit *)header;
271                 printk (KERN_INFO PREFIX
272                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
273                         drhd->flags, (unsigned long long)drhd->address);
274                 break;
275         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
276                 rmrr = (struct acpi_dmar_reserved_memory *)header;
277
278                 printk (KERN_INFO PREFIX
279                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
280                         (unsigned long long)rmrr->base_address,
281                         (unsigned long long)rmrr->end_address);
282                 break;
283         }
284 }
285
286 /**
287  * dmar_table_detect - checks to see if the platform supports DMAR devices
288  */
289 static int __init dmar_table_detect(void)
290 {
291         acpi_status status = AE_OK;
292
293         /* if we could find DMAR table, then there are DMAR devices */
294         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
295                                 (struct acpi_table_header **)&dmar_tbl,
296                                 &dmar_tbl_size);
297
298         if (ACPI_SUCCESS(status) && !dmar_tbl) {
299                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
300                 status = AE_NOT_FOUND;
301         }
302
303         return (ACPI_SUCCESS(status) ? 1 : 0);
304 }
305
306 /**
307  * parse_dmar_table - parses the DMA reporting table
308  */
309 static int __init
310 parse_dmar_table(void)
311 {
312         struct acpi_table_dmar *dmar;
313         struct acpi_dmar_header *entry_header;
314         int ret = 0;
315
316         /*
317          * Do it again, earlier dmar_tbl mapping could be mapped with
318          * fixed map.
319          */
320         dmar_table_detect();
321
322         dmar = (struct acpi_table_dmar *)dmar_tbl;
323         if (!dmar)
324                 return -ENODEV;
325
326         if (dmar->width < PAGE_SHIFT - 1) {
327                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
328                 return -EINVAL;
329         }
330
331         printk (KERN_INFO PREFIX "Host address width %d\n",
332                 dmar->width + 1);
333
334         entry_header = (struct acpi_dmar_header *)(dmar + 1);
335         while (((unsigned long)entry_header) <
336                         (((unsigned long)dmar) + dmar_tbl->length)) {
337                 /* Avoid looping forever on bad ACPI tables */
338                 if (entry_header->length == 0) {
339                         printk(KERN_WARNING PREFIX
340                                 "Invalid 0-length structure\n");
341                         ret = -EINVAL;
342                         break;
343                 }
344
345                 dmar_table_print_dmar_entry(entry_header);
346
347                 switch (entry_header->type) {
348                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
349                         ret = dmar_parse_one_drhd(entry_header);
350                         break;
351                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
352 #ifdef CONFIG_DMAR
353                         ret = dmar_parse_one_rmrr(entry_header);
354 #endif
355                         break;
356                 default:
357                         printk(KERN_WARNING PREFIX
358                                 "Unknown DMAR structure type\n");
359                         ret = 0; /* for forward compatibility */
360                         break;
361                 }
362                 if (ret)
363                         break;
364
365                 entry_header = ((void *)entry_header + entry_header->length);
366         }
367         return ret;
368 }
369
370 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
371                           struct pci_dev *dev)
372 {
373         int index;
374
375         while (dev) {
376                 for (index = 0; index < cnt; index++)
377                         if (dev == devices[index])
378                                 return 1;
379
380                 /* Check our parent */
381                 dev = dev->bus->self;
382         }
383
384         return 0;
385 }
386
387 struct dmar_drhd_unit *
388 dmar_find_matched_drhd_unit(struct pci_dev *dev)
389 {
390         struct dmar_drhd_unit *dmaru = NULL;
391         struct acpi_dmar_hardware_unit *drhd;
392
393         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
394                 drhd = container_of(dmaru->hdr,
395                                     struct acpi_dmar_hardware_unit,
396                                     header);
397
398                 if (dmaru->include_all &&
399                     drhd->segment == pci_domain_nr(dev->bus))
400                         return dmaru;
401
402                 if (dmar_pci_device_match(dmaru->devices,
403                                           dmaru->devices_cnt, dev))
404                         return dmaru;
405         }
406
407         return NULL;
408 }
409
410 int __init dmar_dev_scope_init(void)
411 {
412         struct dmar_drhd_unit *drhd, *drhd_n;
413         int ret = -ENODEV;
414
415         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
416                 ret = dmar_parse_dev(drhd);
417                 if (ret)
418                         return ret;
419         }
420
421 #ifdef CONFIG_DMAR
422         {
423                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
424                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
425                         ret = rmrr_parse_dev(rmrr);
426                         if (ret)
427                                 return ret;
428                 }
429         }
430 #endif
431
432         return ret;
433 }
434
435
436 int __init dmar_table_init(void)
437 {
438         static int dmar_table_initialized;
439         int ret;
440
441         if (dmar_table_initialized)
442                 return 0;
443
444         dmar_table_initialized = 1;
445
446         ret = parse_dmar_table();
447         if (ret) {
448                 if (ret != -ENODEV)
449                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
450                 return ret;
451         }
452
453         if (list_empty(&dmar_drhd_units)) {
454                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
455                 return -ENODEV;
456         }
457
458 #ifdef CONFIG_DMAR
459         if (list_empty(&dmar_rmrr_units))
460                 printk(KERN_INFO PREFIX "No RMRR found\n");
461 #endif
462
463 #ifdef CONFIG_INTR_REMAP
464         parse_ioapics_under_ir();
465 #endif
466         return 0;
467 }
468
469 void __init detect_intel_iommu(void)
470 {
471         int ret;
472
473         ret = dmar_table_detect();
474
475         {
476 #ifdef CONFIG_INTR_REMAP
477                 struct acpi_table_dmar *dmar;
478                 /*
479                  * for now we will disable dma-remapping when interrupt
480                  * remapping is enabled.
481                  * When support for queued invalidation for IOTLB invalidation
482                  * is added, we will not need this any more.
483                  */
484                 dmar = (struct acpi_table_dmar *) dmar_tbl;
485                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
486                         printk(KERN_INFO
487                                "Queued invalidation will be enabled to support "
488                                "x2apic and Intr-remapping.\n");
489 #endif
490 #ifdef CONFIG_DMAR
491                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
492                     !dmar_disabled)
493                         iommu_detected = 1;
494 #endif
495         }
496         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
497         dmar_tbl = NULL;
498 }
499
500
501 int alloc_iommu(struct dmar_drhd_unit *drhd)
502 {
503         struct intel_iommu *iommu;
504         int map_size;
505         u32 ver;
506         static int iommu_allocated = 0;
507         int agaw = 0;
508
509         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
510         if (!iommu)
511                 return -ENOMEM;
512
513         iommu->seq_id = iommu_allocated++;
514
515         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
516         if (!iommu->reg) {
517                 printk(KERN_ERR "IOMMU: can't map the region\n");
518                 goto error;
519         }
520         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
521         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
522
523 #ifdef CONFIG_DMAR
524         agaw = iommu_calculate_agaw(iommu);
525         if (agaw < 0) {
526                 printk(KERN_ERR
527                         "Cannot get a valid agaw for iommu (seq_id = %d)\n",
528                         iommu->seq_id);
529                 goto error;
530         }
531 #endif
532         iommu->agaw = agaw;
533
534         /* the registers might be more than one page */
535         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
536                 cap_max_fault_reg_offset(iommu->cap));
537         map_size = VTD_PAGE_ALIGN(map_size);
538         if (map_size > VTD_PAGE_SIZE) {
539                 iounmap(iommu->reg);
540                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
541                 if (!iommu->reg) {
542                         printk(KERN_ERR "IOMMU: can't map the region\n");
543                         goto error;
544                 }
545         }
546
547         ver = readl(iommu->reg + DMAR_VER_REG);
548         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
549                 (unsigned long long)drhd->reg_base_addr,
550                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
551                 (unsigned long long)iommu->cap,
552                 (unsigned long long)iommu->ecap);
553
554         spin_lock_init(&iommu->register_lock);
555
556         drhd->iommu = iommu;
557         return 0;
558 error:
559         kfree(iommu);
560         return -1;
561 }
562
563 void free_iommu(struct intel_iommu *iommu)
564 {
565         if (!iommu)
566                 return;
567
568 #ifdef CONFIG_DMAR
569         free_dmar_iommu(iommu);
570 #endif
571
572         if (iommu->reg)
573                 iounmap(iommu->reg);
574         kfree(iommu);
575 }
576
577 /*
578  * Reclaim all the submitted descriptors which have completed its work.
579  */
580 static inline void reclaim_free_desc(struct q_inval *qi)
581 {
582         while (qi->desc_status[qi->free_tail] == QI_DONE) {
583                 qi->desc_status[qi->free_tail] = QI_FREE;
584                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
585                 qi->free_cnt++;
586         }
587 }
588
589 static int qi_check_fault(struct intel_iommu *iommu, int index)
590 {
591         u32 fault;
592         int head;
593         struct q_inval *qi = iommu->qi;
594         int wait_index = (index + 1) % QI_LENGTH;
595
596         fault = readl(iommu->reg + DMAR_FSTS_REG);
597
598         /*
599          * If IQE happens, the head points to the descriptor associated
600          * with the error. No new descriptors are fetched until the IQE
601          * is cleared.
602          */
603         if (fault & DMA_FSTS_IQE) {
604                 head = readl(iommu->reg + DMAR_IQH_REG);
605                 if ((head >> 4) == index) {
606                         memcpy(&qi->desc[index], &qi->desc[wait_index],
607                                         sizeof(struct qi_desc));
608                         __iommu_flush_cache(iommu, &qi->desc[index],
609                                         sizeof(struct qi_desc));
610                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
611                         return -EINVAL;
612                 }
613         }
614
615         return 0;
616 }
617
618 /*
619  * Submit the queued invalidation descriptor to the remapping
620  * hardware unit and wait for its completion.
621  */
622 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
623 {
624         int rc = 0;
625         struct q_inval *qi = iommu->qi;
626         struct qi_desc *hw, wait_desc;
627         int wait_index, index;
628         unsigned long flags;
629
630         if (!qi)
631                 return 0;
632
633         hw = qi->desc;
634
635         spin_lock_irqsave(&qi->q_lock, flags);
636         while (qi->free_cnt < 3) {
637                 spin_unlock_irqrestore(&qi->q_lock, flags);
638                 cpu_relax();
639                 spin_lock_irqsave(&qi->q_lock, flags);
640         }
641
642         index = qi->free_head;
643         wait_index = (index + 1) % QI_LENGTH;
644
645         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
646
647         hw[index] = *desc;
648
649         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
650                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
651         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
652
653         hw[wait_index] = wait_desc;
654
655         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
656         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
657
658         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
659         qi->free_cnt -= 2;
660
661         /*
662          * update the HW tail register indicating the presence of
663          * new descriptors.
664          */
665         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
666
667         while (qi->desc_status[wait_index] != QI_DONE) {
668                 /*
669                  * We will leave the interrupts disabled, to prevent interrupt
670                  * context to queue another cmd while a cmd is already submitted
671                  * and waiting for completion on this cpu. This is to avoid
672                  * a deadlock where the interrupt context can wait indefinitely
673                  * for free slots in the queue.
674                  */
675                 rc = qi_check_fault(iommu, index);
676                 if (rc)
677                         goto out;
678
679                 spin_unlock(&qi->q_lock);
680                 cpu_relax();
681                 spin_lock(&qi->q_lock);
682         }
683 out:
684         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
685
686         reclaim_free_desc(qi);
687         spin_unlock_irqrestore(&qi->q_lock, flags);
688
689         return rc;
690 }
691
692 /*
693  * Flush the global interrupt entry cache.
694  */
695 void qi_global_iec(struct intel_iommu *iommu)
696 {
697         struct qi_desc desc;
698
699         desc.low = QI_IEC_TYPE;
700         desc.high = 0;
701
702         /* should never fail */
703         qi_submit_sync(&desc, iommu);
704 }
705
706 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
707                      u64 type, int non_present_entry_flush)
708 {
709         struct qi_desc desc;
710
711         if (non_present_entry_flush) {
712                 if (!cap_caching_mode(iommu->cap))
713                         return 1;
714                 else
715                         did = 0;
716         }
717
718         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
719                         | QI_CC_GRAN(type) | QI_CC_TYPE;
720         desc.high = 0;
721
722         return qi_submit_sync(&desc, iommu);
723 }
724
725 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
726                    unsigned int size_order, u64 type,
727                    int non_present_entry_flush)
728 {
729         u8 dw = 0, dr = 0;
730
731         struct qi_desc desc;
732         int ih = 0;
733
734         if (non_present_entry_flush) {
735                 if (!cap_caching_mode(iommu->cap))
736                         return 1;
737                 else
738                         did = 0;
739         }
740
741         if (cap_write_drain(iommu->cap))
742                 dw = 1;
743
744         if (cap_read_drain(iommu->cap))
745                 dr = 1;
746
747         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
748                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
749         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
750                 | QI_IOTLB_AM(size_order);
751
752         return qi_submit_sync(&desc, iommu);
753 }
754
755 /*
756  * Enable Queued Invalidation interface. This is a must to support
757  * interrupt-remapping. Also used by DMA-remapping, which replaces
758  * register based IOTLB invalidation.
759  */
760 int dmar_enable_qi(struct intel_iommu *iommu)
761 {
762         u32 cmd, sts;
763         unsigned long flags;
764         struct q_inval *qi;
765
766         if (!ecap_qis(iommu->ecap))
767                 return -ENOENT;
768
769         /*
770          * queued invalidation is already setup and enabled.
771          */
772         if (iommu->qi)
773                 return 0;
774
775         iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
776         if (!iommu->qi)
777                 return -ENOMEM;
778
779         qi = iommu->qi;
780
781         qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
782         if (!qi->desc) {
783                 kfree(qi);
784                 iommu->qi = 0;
785                 return -ENOMEM;
786         }
787
788         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
789         if (!qi->desc_status) {
790                 free_page((unsigned long) qi->desc);
791                 kfree(qi);
792                 iommu->qi = 0;
793                 return -ENOMEM;
794         }
795
796         qi->free_head = qi->free_tail = 0;
797         qi->free_cnt = QI_LENGTH;
798
799         spin_lock_init(&qi->q_lock);
800
801         spin_lock_irqsave(&iommu->register_lock, flags);
802         /* write zero to the tail reg */
803         writel(0, iommu->reg + DMAR_IQT_REG);
804
805         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
806
807         cmd = iommu->gcmd | DMA_GCMD_QIE;
808         iommu->gcmd |= DMA_GCMD_QIE;
809         writel(cmd, iommu->reg + DMAR_GCMD_REG);
810
811         /* Make sure hardware complete it */
812         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
813         spin_unlock_irqrestore(&iommu->register_lock, flags);
814
815         return 0;
816 }
817
818 /* iommu interrupt handling. Most stuff are MSI-like. */
819
820 static const char *fault_reason_strings[] =
821 {
822         "Software",
823         "Present bit in root entry is clear",
824         "Present bit in context entry is clear",
825         "Invalid context entry",
826         "Access beyond MGAW",
827         "PTE Write access is not set",
828         "PTE Read access is not set",
829         "Next page table ptr is invalid",
830         "Root table address invalid",
831         "Context table ptr is invalid",
832         "non-zero reserved fields in RTP",
833         "non-zero reserved fields in CTP",
834         "non-zero reserved fields in PTE",
835 };
836 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
837
838 const char *dmar_get_fault_reason(u8 fault_reason)
839 {
840         if (fault_reason > MAX_FAULT_REASON_IDX)
841                 return "Unknown";
842         else
843                 return fault_reason_strings[fault_reason];
844 }
845
846 void dmar_msi_unmask(unsigned int irq)
847 {
848         struct intel_iommu *iommu = get_irq_data(irq);
849         unsigned long flag;
850
851         /* unmask it */
852         spin_lock_irqsave(&iommu->register_lock, flag);
853         writel(0, iommu->reg + DMAR_FECTL_REG);
854         /* Read a reg to force flush the post write */
855         readl(iommu->reg + DMAR_FECTL_REG);
856         spin_unlock_irqrestore(&iommu->register_lock, flag);
857 }
858
859 void dmar_msi_mask(unsigned int irq)
860 {
861         unsigned long flag;
862         struct intel_iommu *iommu = get_irq_data(irq);
863
864         /* mask it */
865         spin_lock_irqsave(&iommu->register_lock, flag);
866         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
867         /* Read a reg to force flush the post write */
868         readl(iommu->reg + DMAR_FECTL_REG);
869         spin_unlock_irqrestore(&iommu->register_lock, flag);
870 }
871
872 void dmar_msi_write(int irq, struct msi_msg *msg)
873 {
874         struct intel_iommu *iommu = get_irq_data(irq);
875         unsigned long flag;
876
877         spin_lock_irqsave(&iommu->register_lock, flag);
878         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
879         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
880         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
881         spin_unlock_irqrestore(&iommu->register_lock, flag);
882 }
883
884 void dmar_msi_read(int irq, struct msi_msg *msg)
885 {
886         struct intel_iommu *iommu = get_irq_data(irq);
887         unsigned long flag;
888
889         spin_lock_irqsave(&iommu->register_lock, flag);
890         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
891         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
892         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
893         spin_unlock_irqrestore(&iommu->register_lock, flag);
894 }
895
896 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
897                 u8 fault_reason, u16 source_id, unsigned long long addr)
898 {
899         const char *reason;
900
901         reason = dmar_get_fault_reason(fault_reason);
902
903         printk(KERN_ERR
904                 "DMAR:[%s] Request device [%02x:%02x.%d] "
905                 "fault addr %llx \n"
906                 "DMAR:[fault reason %02d] %s\n",
907                 (type ? "DMA Read" : "DMA Write"),
908                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
909                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
910         return 0;
911 }
912
913 #define PRIMARY_FAULT_REG_LEN (16)
914 static irqreturn_t dmar_fault(int irq, void *dev_id)
915 {
916         struct intel_iommu *iommu = dev_id;
917         int reg, fault_index;
918         u32 fault_status;
919         unsigned long flag;
920
921         spin_lock_irqsave(&iommu->register_lock, flag);
922         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
923
924         /* TBD: ignore advanced fault log currently */
925         if (!(fault_status & DMA_FSTS_PPF))
926                 goto clear_overflow;
927
928         fault_index = dma_fsts_fault_record_index(fault_status);
929         reg = cap_fault_reg_offset(iommu->cap);
930         while (1) {
931                 u8 fault_reason;
932                 u16 source_id;
933                 u64 guest_addr;
934                 int type;
935                 u32 data;
936
937                 /* highest 32 bits */
938                 data = readl(iommu->reg + reg +
939                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
940                 if (!(data & DMA_FRCD_F))
941                         break;
942
943                 fault_reason = dma_frcd_fault_reason(data);
944                 type = dma_frcd_type(data);
945
946                 data = readl(iommu->reg + reg +
947                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
948                 source_id = dma_frcd_source_id(data);
949
950                 guest_addr = dmar_readq(iommu->reg + reg +
951                                 fault_index * PRIMARY_FAULT_REG_LEN);
952                 guest_addr = dma_frcd_page_addr(guest_addr);
953                 /* clear the fault */
954                 writel(DMA_FRCD_F, iommu->reg + reg +
955                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
956
957                 spin_unlock_irqrestore(&iommu->register_lock, flag);
958
959                 dmar_fault_do_one(iommu, type, fault_reason,
960                                 source_id, guest_addr);
961
962                 fault_index++;
963                 if (fault_index > cap_num_fault_regs(iommu->cap))
964                         fault_index = 0;
965                 spin_lock_irqsave(&iommu->register_lock, flag);
966         }
967 clear_overflow:
968         /* clear primary fault overflow */
969         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
970         if (fault_status & DMA_FSTS_PFO)
971                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
972
973         spin_unlock_irqrestore(&iommu->register_lock, flag);
974         return IRQ_HANDLED;
975 }
976
977 int dmar_set_interrupt(struct intel_iommu *iommu)
978 {
979         int irq, ret;
980
981         irq = create_irq();
982         if (!irq) {
983                 printk(KERN_ERR "IOMMU: no free vectors\n");
984                 return -EINVAL;
985         }
986
987         set_irq_data(irq, iommu);
988         iommu->irq = irq;
989
990         ret = arch_setup_dmar_msi(irq);
991         if (ret) {
992                 set_irq_data(irq, NULL);
993                 iommu->irq = 0;
994                 destroy_irq(irq);
995                 return 0;
996         }
997
998         /* Force fault register is cleared */
999         dmar_fault(irq, iommu);
1000
1001         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1002         if (ret)
1003                 printk(KERN_ERR "IOMMU: can't request irq\n");
1004         return ret;
1005 }