1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2014 Intel Corp.
4 * Author: Jiang Liu <jiang.liu@linux.intel.com>
6 * This file is licensed under GPLv2.
8 * This file contains common code to support Message Signaled Interrupts for
9 * PCI compatible and non PCI compatible devices.
11 #include <linux/types.h>
12 #include <linux/device.h>
13 #include <linux/irq.h>
14 #include <linux/irqdomain.h>
15 #include <linux/msi.h>
16 #include <linux/slab.h>
17 #include <linux/sysfs.h>
18 #include <linux/pci.h>
20 #include "internals.h"
23 * alloc_msi_entry - Allocate an initialized msi_desc
24 * @dev: Pointer to the device for which this is allocated
25 * @nvec: The number of vectors used in this entry
26 * @affinity: Optional pointer to an affinity mask array size of @nvec
28 * If @affinity is not %NULL then an affinity array[@nvec] is allocated
29 * and the affinity masks and flags from @affinity are copied.
31 * Return: pointer to allocated &msi_desc on success or %NULL on failure
33 struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
34 const struct irq_affinity_desc *affinity)
36 struct msi_desc *desc;
38 desc = kzalloc(sizeof(*desc), GFP_KERNEL);
42 INIT_LIST_HEAD(&desc->list);
44 desc->nvec_used = nvec;
46 desc->affinity = kmemdup(affinity,
47 nvec * sizeof(*desc->affinity), GFP_KERNEL);
48 if (!desc->affinity) {
57 void free_msi_entry(struct msi_desc *entry)
59 kfree(entry->affinity);
63 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
68 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
70 struct msi_desc *entry = irq_get_msi_desc(irq);
72 __get_cached_msi_msg(entry, msg);
74 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
76 static void msi_device_data_release(struct device *dev, void *res)
78 struct msi_device_data *md = res;
80 WARN_ON_ONCE(!list_empty(&md->list));
85 * msi_setup_device_data - Setup MSI device data
86 * @dev: Device for which MSI device data should be set up
88 * Return: 0 on success, appropriate error code otherwise
90 * This can be called more than once for @dev. If the MSI device data is
91 * already allocated the call succeeds. The allocated memory is
92 * automatically released when the device is destroyed.
94 int msi_setup_device_data(struct device *dev)
96 struct msi_device_data *md;
101 md = devres_alloc(msi_device_data_release, sizeof(*md), GFP_KERNEL);
105 INIT_LIST_HEAD(&md->list);
112 * msi_get_virq - Return Linux interrupt number of a MSI interrupt
113 * @dev: Device to operate on
114 * @index: MSI interrupt index to look for (0-based)
116 * Return: The Linux interrupt number on success (> 0), 0 if not found
118 unsigned int msi_get_virq(struct device *dev, unsigned int index)
120 struct msi_desc *desc;
126 pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false;
128 for_each_msi_entry(desc, dev) {
129 /* PCI-MSI has only one descriptor for multiple interrupts. */
131 if (desc->irq && index < desc->nvec_used)
132 return desc->irq + index;
137 * PCI-MSIX and platform MSI use a descriptor per
140 if (desc->msi_index == index)
145 EXPORT_SYMBOL_GPL(msi_get_virq);
148 static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
151 /* MSI vs. MSIX is per device not per interrupt */
152 bool is_msix = dev_is_pci(dev) ? to_pci_dev(dev)->msix_enabled : false;
154 return sysfs_emit(buf, "%s\n", is_msix ? "msix" : "msi");
158 * msi_populate_sysfs - Populate msi_irqs sysfs entries for devices
159 * @dev: The device(PCI, platform etc) who will get sysfs entries
161 static const struct attribute_group **msi_populate_sysfs(struct device *dev)
163 const struct attribute_group **msi_irq_groups;
164 struct attribute **msi_attrs, *msi_attr;
165 struct device_attribute *msi_dev_attr;
166 struct attribute_group *msi_irq_group;
167 struct msi_desc *entry;
173 /* Determine how many msi entries we have */
174 for_each_msi_entry(entry, dev)
175 num_msi += entry->nvec_used;
179 /* Dynamically create the MSI attributes for the device */
180 msi_attrs = kcalloc(num_msi + 1, sizeof(void *), GFP_KERNEL);
182 return ERR_PTR(-ENOMEM);
184 for_each_msi_entry(entry, dev) {
185 for (i = 0; i < entry->nvec_used; i++) {
186 msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
189 msi_attrs[count] = &msi_dev_attr->attr;
191 sysfs_attr_init(&msi_dev_attr->attr);
192 msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
194 if (!msi_dev_attr->attr.name)
196 msi_dev_attr->attr.mode = 0444;
197 msi_dev_attr->show = msi_mode_show;
202 msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
205 msi_irq_group->name = "msi_irqs";
206 msi_irq_group->attrs = msi_attrs;
208 msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL);
210 goto error_irq_group;
211 msi_irq_groups[0] = msi_irq_group;
213 ret = sysfs_create_groups(&dev->kobj, msi_irq_groups);
215 goto error_irq_groups;
217 return msi_irq_groups;
220 kfree(msi_irq_groups);
222 kfree(msi_irq_group);
225 msi_attr = msi_attrs[count];
227 msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
228 kfree(msi_attr->name);
231 msi_attr = msi_attrs[count];
238 * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
239 * @dev: The device (PCI, platform etc) which will get sysfs entries
241 int msi_device_populate_sysfs(struct device *dev)
243 const struct attribute_group **group = msi_populate_sysfs(dev);
246 return PTR_ERR(group);
247 dev->msi.data->attrs = group;
252 * msi_device_destroy_sysfs - Destroy msi_irqs sysfs entries for a device
253 * @dev: The device (PCI, platform etc) for which to remove
256 void msi_device_destroy_sysfs(struct device *dev)
258 const struct attribute_group **msi_irq_groups = dev->msi.data->attrs;
259 struct device_attribute *dev_attr;
260 struct attribute **msi_attrs;
263 dev->msi.data->attrs = NULL;
267 sysfs_remove_groups(&dev->kobj, msi_irq_groups);
268 msi_attrs = msi_irq_groups[0]->attrs;
269 while (msi_attrs[count]) {
270 dev_attr = container_of(msi_attrs[count], struct device_attribute, attr);
271 kfree(dev_attr->attr.name);
276 kfree(msi_irq_groups[0]);
277 kfree(msi_irq_groups);
281 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
282 static inline void irq_chip_write_msi_msg(struct irq_data *data,
285 data->chip->irq_write_msi_msg(data, msg);
288 static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
290 struct msi_domain_info *info = domain->host_data;
293 * If the MSI provider has messed with the second message and
294 * not advertized that it is level-capable, signal the breakage.
296 WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
297 (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
298 (msg[1].address_lo || msg[1].address_hi || msg[1].data));
302 * msi_domain_set_affinity - Generic affinity setter function for MSI domains
303 * @irq_data: The irq data associated to the interrupt
304 * @mask: The affinity mask to set
305 * @force: Flag to enforce setting (disable online checks)
307 * Intended to be used by MSI interrupt controllers which are
308 * implemented with hierarchical domains.
310 * Return: IRQ_SET_MASK_* result code
312 int msi_domain_set_affinity(struct irq_data *irq_data,
313 const struct cpumask *mask, bool force)
315 struct irq_data *parent = irq_data->parent_data;
316 struct msi_msg msg[2] = { [1] = { }, };
319 ret = parent->chip->irq_set_affinity(parent, mask, force);
320 if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
321 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
322 msi_check_level(irq_data->domain, msg);
323 irq_chip_write_msi_msg(irq_data, msg);
329 static int msi_domain_activate(struct irq_domain *domain,
330 struct irq_data *irq_data, bool early)
332 struct msi_msg msg[2] = { [1] = { }, };
334 BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
335 msi_check_level(irq_data->domain, msg);
336 irq_chip_write_msi_msg(irq_data, msg);
340 static void msi_domain_deactivate(struct irq_domain *domain,
341 struct irq_data *irq_data)
343 struct msi_msg msg[2];
345 memset(msg, 0, sizeof(msg));
346 irq_chip_write_msi_msg(irq_data, msg);
349 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
350 unsigned int nr_irqs, void *arg)
352 struct msi_domain_info *info = domain->host_data;
353 struct msi_domain_ops *ops = info->ops;
354 irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
357 if (irq_find_mapping(domain, hwirq) > 0)
360 if (domain->parent) {
361 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
366 for (i = 0; i < nr_irqs; i++) {
367 ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
370 for (i--; i > 0; i--)
371 ops->msi_free(domain, info, virq + i);
373 irq_domain_free_irqs_top(domain, virq, nr_irqs);
381 static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
382 unsigned int nr_irqs)
384 struct msi_domain_info *info = domain->host_data;
387 if (info->ops->msi_free) {
388 for (i = 0; i < nr_irqs; i++)
389 info->ops->msi_free(domain, info, virq + i);
391 irq_domain_free_irqs_top(domain, virq, nr_irqs);
394 static const struct irq_domain_ops msi_domain_ops = {
395 .alloc = msi_domain_alloc,
396 .free = msi_domain_free,
397 .activate = msi_domain_activate,
398 .deactivate = msi_domain_deactivate,
401 static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
402 msi_alloc_info_t *arg)
407 static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
408 int nvec, msi_alloc_info_t *arg)
410 memset(arg, 0, sizeof(*arg));
414 static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
415 struct msi_desc *desc)
420 static int msi_domain_ops_init(struct irq_domain *domain,
421 struct msi_domain_info *info,
422 unsigned int virq, irq_hw_number_t hwirq,
423 msi_alloc_info_t *arg)
425 irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
427 if (info->handler && info->handler_name) {
428 __irq_set_handler(virq, info->handler, 0, info->handler_name);
429 if (info->handler_data)
430 irq_set_handler_data(virq, info->handler_data);
435 static int msi_domain_ops_check(struct irq_domain *domain,
436 struct msi_domain_info *info,
442 static struct msi_domain_ops msi_domain_ops_default = {
443 .get_hwirq = msi_domain_ops_get_hwirq,
444 .msi_init = msi_domain_ops_init,
445 .msi_check = msi_domain_ops_check,
446 .msi_prepare = msi_domain_ops_prepare,
447 .set_desc = msi_domain_ops_set_desc,
448 .domain_alloc_irqs = __msi_domain_alloc_irqs,
449 .domain_free_irqs = __msi_domain_free_irqs,
452 static void msi_domain_update_dom_ops(struct msi_domain_info *info)
454 struct msi_domain_ops *ops = info->ops;
457 info->ops = &msi_domain_ops_default;
461 if (ops->domain_alloc_irqs == NULL)
462 ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs;
463 if (ops->domain_free_irqs == NULL)
464 ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs;
466 if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
469 if (ops->get_hwirq == NULL)
470 ops->get_hwirq = msi_domain_ops_default.get_hwirq;
471 if (ops->msi_init == NULL)
472 ops->msi_init = msi_domain_ops_default.msi_init;
473 if (ops->msi_check == NULL)
474 ops->msi_check = msi_domain_ops_default.msi_check;
475 if (ops->msi_prepare == NULL)
476 ops->msi_prepare = msi_domain_ops_default.msi_prepare;
477 if (ops->set_desc == NULL)
478 ops->set_desc = msi_domain_ops_default.set_desc;
481 static void msi_domain_update_chip_ops(struct msi_domain_info *info)
483 struct irq_chip *chip = info->chip;
485 BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
486 if (!chip->irq_set_affinity)
487 chip->irq_set_affinity = msi_domain_set_affinity;
491 * msi_create_irq_domain - Create an MSI interrupt domain
492 * @fwnode: Optional fwnode of the interrupt controller
493 * @info: MSI domain info
494 * @parent: Parent irq domain
496 * Return: pointer to the created &struct irq_domain or %NULL on failure
498 struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
499 struct msi_domain_info *info,
500 struct irq_domain *parent)
502 struct irq_domain *domain;
504 msi_domain_update_dom_ops(info);
505 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
506 msi_domain_update_chip_ops(info);
508 domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
509 fwnode, &msi_domain_ops, info);
511 if (domain && !domain->name && info->chip)
512 domain->name = info->chip->name;
517 int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
518 int nvec, msi_alloc_info_t *arg)
520 struct msi_domain_info *info = domain->host_data;
521 struct msi_domain_ops *ops = info->ops;
524 ret = ops->msi_check(domain, info, dev);
526 ret = ops->msi_prepare(domain, dev, nvec, arg);
531 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
532 int virq, int nvec, msi_alloc_info_t *arg)
534 struct msi_domain_info *info = domain->host_data;
535 struct msi_domain_ops *ops = info->ops;
536 struct msi_desc *desc;
539 for_each_msi_entry(desc, dev) {
540 /* Don't even try the multi-MSI brain damage. */
541 if (WARN_ON(!desc->irq || desc->nvec_used != 1)) {
546 if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
549 ops->set_desc(arg, desc);
550 /* Assumes the domain mutex is held! */
551 ret = irq_domain_alloc_irqs_hierarchy(domain, desc->irq, 1,
556 irq_set_msi_desc_off(desc->irq, 0, desc);
560 /* Mop up the damage */
561 for_each_msi_entry(desc, dev) {
562 if (!(desc->irq >= virq && desc->irq < (virq + nvec)))
565 irq_domain_free_irqs_common(domain, desc->irq, 1);
573 * Carefully check whether the device can use reservation mode. If
574 * reservation mode is enabled then the early activation will assign a
575 * dummy vector to the device. If the PCI/MSI device does not support
576 * masking of the entry then this can result in spurious interrupts when
577 * the device driver is not absolutely careful. But even then a malfunction
578 * of the hardware could result in a spurious interrupt on the dummy vector
579 * and render the device unusable. If the entry can be masked then the core
580 * logic will prevent the spurious interrupt and reservation mode can be
581 * used. For now reservation mode is restricted to PCI/MSI.
583 static bool msi_check_reservation_mode(struct irq_domain *domain,
584 struct msi_domain_info *info,
587 struct msi_desc *desc;
589 switch(domain->bus_token) {
590 case DOMAIN_BUS_PCI_MSI:
591 case DOMAIN_BUS_VMD_MSI:
597 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
600 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
604 * Checking the first MSI descriptor is sufficient. MSIX supports
605 * masking and MSI does so when the can_mask attribute is set.
607 desc = first_msi_entry(dev);
608 return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
611 static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc,
614 switch(domain->bus_token) {
615 case DOMAIN_BUS_PCI_MSI:
616 case DOMAIN_BUS_VMD_MSI:
617 if (IS_ENABLED(CONFIG_PCI_MSI))
624 /* Let a failed PCI multi MSI allocation retry */
625 if (desc->nvec_used > 1)
628 /* If there was a successful allocation let the caller know */
629 return allocated ? allocated : -ENOSPC;
632 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
635 struct msi_domain_info *info = domain->host_data;
636 struct msi_domain_ops *ops = info->ops;
637 struct irq_data *irq_data;
638 struct msi_desc *desc;
639 msi_alloc_info_t arg = { };
644 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
648 for_each_msi_entry(desc, dev) {
649 ops->set_desc(&arg, desc);
651 virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
652 dev_to_node(dev), &arg, false,
655 ret = msi_handle_pci_fail(domain, desc, allocated);
659 for (i = 0; i < desc->nvec_used; i++) {
660 irq_set_msi_desc_off(virq, i, desc);
661 irq_debugfs_copy_devname(virq + i, dev);
666 can_reserve = msi_check_reservation_mode(domain, info, dev);
669 * This flag is set by the PCI layer as we need to activate
670 * the MSI entries before the PCI layer enables MSI in the
671 * card. Otherwise the card latches a random msi message.
673 if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
676 for_each_msi_vector(desc, i, dev) {
677 if (desc->irq == i) {
679 dev_dbg(dev, "irq [%d-%d] for MSI\n",
680 virq, virq + desc->nvec_used - 1);
683 irq_data = irq_domain_get_irq_data(domain, i);
685 irqd_clr_can_reserve(irq_data);
686 if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
687 irqd_set_msi_nomask_quirk(irq_data);
689 ret = irq_domain_activate_irq(irq_data, can_reserve);
696 * If these interrupts use reservation mode, clear the activated bit
697 * so request_irq() will assign the final vector.
700 for_each_msi_vector(desc, i, dev) {
701 irq_data = irq_domain_get_irq_data(domain, i);
702 irqd_clr_activated(irq_data);
708 msi_domain_free_irqs(domain, dev);
713 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
714 * @domain: The domain to allocate from
715 * @dev: Pointer to device struct of the device for which the interrupts
717 * @nvec: The number of interrupts to allocate
719 * Return: %0 on success or an error code.
721 int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
724 struct msi_domain_info *info = domain->host_data;
725 struct msi_domain_ops *ops = info->ops;
728 ret = ops->domain_alloc_irqs(domain, dev, nvec);
732 if (!(info->flags & MSI_FLAG_DEV_SYSFS))
735 ret = msi_device_populate_sysfs(dev);
737 msi_domain_free_irqs(domain, dev);
741 void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
743 struct irq_data *irq_data;
744 struct msi_desc *desc;
747 for_each_msi_vector(desc, i, dev) {
748 irq_data = irq_domain_get_irq_data(domain, i);
749 if (irqd_is_activated(irq_data))
750 irq_domain_deactivate_irq(irq_data);
753 for_each_msi_entry(desc, dev) {
755 * We might have failed to allocate an MSI early
756 * enough that there is no IRQ associated to this
757 * entry. If that's the case, don't do anything.
760 irq_domain_free_irqs(desc->irq, desc->nvec_used);
767 * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated to @dev
768 * @domain: The domain to managing the interrupts
769 * @dev: Pointer to device struct of the device for which the interrupts
772 void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
774 struct msi_domain_info *info = domain->host_data;
775 struct msi_domain_ops *ops = info->ops;
777 if (info->flags & MSI_FLAG_DEV_SYSFS)
778 msi_device_destroy_sysfs(dev);
779 ops->domain_free_irqs(domain, dev);
783 * msi_get_domain_info - Get the MSI interrupt domain info for @domain
784 * @domain: The interrupt domain to retrieve data from
786 * Return: the pointer to the msi_domain_info stored in @domain->host_data.
788 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
790 return (struct msi_domain_info *)domain->host_data;
793 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */