1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <linux/interrupt.h>
5 #include <linux/notifier.h>
6 #include <linux/module.h>
7 #include <linux/mlx5/driver.h>
12 #ifdef CONFIG_RFS_ACCEL
13 #include <linux/cpu_rmap.h>
16 #define MLX5_SFS_PER_CTRL_IRQ 64
17 #define MLX5_IRQ_CTRL_SF_MAX 8
18 /* min num of vectors for SFs to be enabled */
19 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
21 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
22 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
23 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
24 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
27 struct atomic_notifier_head nh;
29 char name[MLX5_MAX_IRQ_NAME];
30 struct mlx5_irq_pool *pool;
36 struct mlx5_irq_table {
37 struct mlx5_irq_pool *pf_pool;
38 struct mlx5_irq_pool *sf_ctrl_pool;
39 struct mlx5_irq_pool *sf_comp_pool;
43 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
44 * to be ssigned to each VF.
46 * @num_vfs: Number of enabled VFs
48 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
50 int num_vf_msix, min_msix, max_msix;
52 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
56 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
57 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
59 /* Limit maximum number of MSI-X vectors so the default configuration
60 * has some available in the pool. This will allow the user to increase
61 * the number of vectors in a VF without having to first size-down other
64 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
68 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
70 * @function_id: Internal PCI VF function IDd
71 * @msix_vec_count: Number of MSI-X vectors to set
73 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
76 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
77 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
78 void *hca_cap = NULL, *query_cap = NULL, *cap;
79 int num_vf_msix, min_msix, max_msix;
82 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
86 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
89 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
90 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
92 if (msix_vec_count < min_msix)
95 if (msix_vec_count > max_msix)
98 query_cap = kzalloc(query_sz, GFP_KERNEL);
99 hca_cap = kzalloc(set_sz, GFP_KERNEL);
100 if (!hca_cap || !query_cap) {
105 ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
109 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
110 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
111 MLX5_UN_SZ_BYTES(hca_cap_union));
112 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
114 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
115 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
116 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
118 MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
119 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
120 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
127 static void irq_release(struct mlx5_irq *irq)
129 struct mlx5_irq_pool *pool = irq->pool;
131 xa_erase(&pool->irqs, irq->index);
132 /* free_irq requires that affinity and rmap will be cleared
133 * before calling it. This is why there is asymmetry with set_rmap
134 * which should be called after alloc_irq but before request_irq.
136 irq_set_affinity_hint(irq->irqn, NULL);
137 free_cpumask_var(irq->mask);
138 free_irq(irq->irqn, &irq->nh);
142 int mlx5_irq_put(struct mlx5_irq *irq)
144 struct mlx5_irq_pool *pool = irq->pool;
147 mutex_lock(&pool->lock);
149 if (!irq->refcount) {
153 mutex_unlock(&pool->lock);
157 int mlx5_irq_read_locked(struct mlx5_irq *irq)
159 lockdep_assert_held(&irq->pool->lock);
160 return irq->refcount;
163 int mlx5_irq_get_locked(struct mlx5_irq *irq)
165 lockdep_assert_held(&irq->pool->lock);
166 if (WARN_ON_ONCE(!irq->refcount))
172 static int irq_get(struct mlx5_irq *irq)
176 mutex_lock(&irq->pool->lock);
177 err = mlx5_irq_get_locked(irq);
178 mutex_unlock(&irq->pool->lock);
182 static irqreturn_t irq_int_handler(int irq, void *nh)
184 atomic_notifier_call_chain(nh, 0, NULL);
188 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
190 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
193 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
195 if (!pool->xa_num_irqs.max) {
196 /* in case we only have a single irq for the device */
197 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
201 if (vecidx == pool->xa_num_irqs.max) {
202 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
206 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
209 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
210 const struct cpumask *affinity)
212 struct mlx5_core_dev *dev = pool->dev;
213 char name[MLX5_MAX_IRQ_NAME];
214 struct mlx5_irq *irq;
217 irq = kzalloc(sizeof(*irq), GFP_KERNEL);
219 return ERR_PTR(-ENOMEM);
220 irq->irqn = pci_irq_vector(dev->pdev, i);
221 if (!mlx5_irq_pool_is_sf_pool(pool))
222 irq_set_name(pool, name, i);
224 irq_sf_set_name(pool, name, i);
225 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
226 snprintf(irq->name, MLX5_MAX_IRQ_NAME,
227 "%s@pci:%s", name, pci_name(dev->pdev));
228 err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
231 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
234 if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
235 mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
240 cpumask_copy(irq->mask, affinity);
241 irq_set_affinity_hint(irq->irqn, irq->mask);
246 err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
248 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
254 irq_set_affinity_hint(irq->irqn, NULL);
255 free_cpumask_var(irq->mask);
257 free_irq(irq->irqn, &irq->nh);
263 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
269 /* Something very bad happens here, we are enabling EQ
270 * on non-existing IRQ.
273 ret = atomic_notifier_chain_register(&irq->nh, nb);
279 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
283 err = atomic_notifier_chain_unregister(&irq->nh, nb);
288 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
293 int mlx5_irq_get_index(struct mlx5_irq *irq)
300 /* requesting an irq from a given pool according to given index */
301 static struct mlx5_irq *
302 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
303 struct cpumask *affinity)
305 struct mlx5_irq *irq;
307 mutex_lock(&pool->lock);
308 irq = xa_load(&pool->irqs, vecidx);
310 mlx5_irq_get_locked(irq);
313 irq = mlx5_irq_alloc(pool, vecidx, affinity);
315 mutex_unlock(&pool->lock);
319 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
321 return irq_table->sf_ctrl_pool;
324 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
326 return irq_table->sf_comp_pool;
329 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
331 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
332 struct mlx5_irq_pool *pool = NULL;
334 if (mlx5_core_is_sf(dev))
335 pool = sf_irq_pool_get(irq_table);
337 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
338 * the PF IRQs pool in case the SF pool doesn't exist.
340 return pool ? pool : irq_table->pf_pool;
343 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
345 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
346 struct mlx5_irq_pool *pool = NULL;
348 if (mlx5_core_is_sf(dev))
349 pool = sf_ctrl_irq_pool_get(irq_table);
351 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
352 * the PF IRQs pool in case the SF pool doesn't exist.
354 return pool ? pool : irq_table->pf_pool;
358 * mlx5_irqs_release - release one or more IRQs back to the system.
359 * @irqs: IRQs to be released.
360 * @nirqs: number of IRQs to be released.
362 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
366 for (i = 0; i < nirqs; i++) {
367 synchronize_irq(irqs[i]->irqn);
368 mlx5_irq_put(irqs[i]);
373 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
374 * @ctrl_irq: ctrl IRQ to be released.
376 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
378 mlx5_irqs_release(&ctrl_irq, 1);
382 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
383 * @dev: mlx5 device that requesting the IRQ.
385 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
387 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
389 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
390 cpumask_var_t req_mask;
391 struct mlx5_irq *irq;
393 if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
394 return ERR_PTR(-ENOMEM);
395 cpumask_copy(req_mask, cpu_online_mask);
396 if (!mlx5_irq_pool_is_sf_pool(pool)) {
397 /* In case we are allocating a control IRQ for PF/VF */
398 if (!pool->xa_num_irqs.max) {
399 cpumask_clear(req_mask);
400 /* In case we only have a single IRQ for PF/VF */
401 cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
403 /* Allocate the IRQ in the last index of the pool */
404 irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
406 irq = mlx5_irq_affinity_request(pool, req_mask);
409 free_cpumask_var(req_mask);
414 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
415 * @dev: mlx5 device that requesting the IRQ.
416 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
418 * @affinity: cpumask requested for this IRQ.
420 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
422 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
423 struct cpumask *affinity)
425 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
426 struct mlx5_irq_pool *pool;
427 struct mlx5_irq *irq;
429 pool = irq_table->pf_pool;
430 irq = irq_pool_request_vector(pool, vecidx, affinity);
433 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
434 irq->irqn, cpumask_pr_args(affinity),
435 irq->refcount / MLX5_EQ_REFS_PER_IRQ);
440 * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
441 * @irqs: IRQs to be released.
442 * @nirqs: number of IRQs to be released.
444 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
446 mlx5_irqs_release(irqs, nirqs);
450 * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
451 * @dev: mlx5 device that is requesting the IRQs.
452 * @cpus: CPUs array for binding the IRQs
453 * @nirqs: number of IRQs to request.
454 * @irqs: an output array of IRQs pointers.
456 * Each IRQ is bound to at most 1 CPU.
457 * This function is requests nirqs IRQs, starting from @vecidx.
459 * This function returns the number of IRQs requested, (which might be smaller than
460 * @nirqs), if successful, or a negative error code in case of an error.
462 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
463 struct mlx5_irq **irqs)
465 cpumask_var_t req_mask;
466 struct mlx5_irq *irq;
469 if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
471 for (i = 0; i < nirqs; i++) {
472 cpumask_set_cpu(cpus[i], req_mask);
473 irq = mlx5_irq_request(dev, i, req_mask);
476 cpumask_clear(req_mask);
480 free_cpumask_var(req_mask);
481 return i ? i : PTR_ERR(irq);
484 static struct mlx5_irq_pool *
485 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
486 u32 min_threshold, u32 max_threshold)
488 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
491 return ERR_PTR(-ENOMEM);
493 mutex_init(&pool->lock);
494 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
495 pool->xa_num_irqs.min = start;
496 pool->xa_num_irqs.max = start + size - 1;
498 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
500 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
501 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
502 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
507 static void irq_pool_free(struct mlx5_irq_pool *pool)
509 struct mlx5_irq *irq;
512 /* There are cases in which we are destrying the irq_table before
513 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
514 * which might not have been freed.
516 xa_for_each(&pool->irqs, index, irq)
518 xa_destroy(&pool->irqs);
519 mutex_destroy(&pool->lock);
520 kfree(pool->irqs_per_cpu);
524 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
526 struct mlx5_irq_table *table = dev->priv.irq_table;
527 int num_sf_ctrl_by_msix;
528 int num_sf_ctrl_by_sfs;
533 table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
534 MLX5_EQ_SHARE_IRQ_MIN_COMP,
535 MLX5_EQ_SHARE_IRQ_MAX_COMP);
536 if (IS_ERR(table->pf_pool))
537 return PTR_ERR(table->pf_pool);
538 if (!mlx5_sf_max_functions(dev))
540 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
541 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
545 /* init sf_ctrl_pool */
546 num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
547 num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
548 MLX5_SFS_PER_CTRL_IRQ);
549 num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
550 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
551 table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
553 MLX5_EQ_SHARE_IRQ_MIN_CTRL,
554 MLX5_EQ_SHARE_IRQ_MAX_CTRL);
555 if (IS_ERR(table->sf_ctrl_pool)) {
556 err = PTR_ERR(table->sf_ctrl_pool);
559 /* init sf_comp_pool */
560 table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
561 sf_vec - num_sf_ctrl, "mlx5_sf_comp",
562 MLX5_EQ_SHARE_IRQ_MIN_COMP,
563 MLX5_EQ_SHARE_IRQ_MAX_COMP);
564 if (IS_ERR(table->sf_comp_pool)) {
565 err = PTR_ERR(table->sf_comp_pool);
569 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
570 if (!table->sf_comp_pool->irqs_per_cpu) {
572 goto err_irqs_per_cpu;
578 irq_pool_free(table->sf_comp_pool);
580 irq_pool_free(table->sf_ctrl_pool);
582 irq_pool_free(table->pf_pool);
586 static void irq_pools_destroy(struct mlx5_irq_table *table)
588 if (table->sf_ctrl_pool) {
589 irq_pool_free(table->sf_comp_pool);
590 irq_pool_free(table->sf_ctrl_pool);
592 irq_pool_free(table->pf_pool);
597 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
599 struct mlx5_irq_table *irq_table;
601 if (mlx5_core_is_sf(dev))
604 irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
608 dev->priv.irq_table = irq_table;
612 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
614 if (mlx5_core_is_sf(dev))
617 kvfree(dev->priv.irq_table);
620 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
622 if (!table->pf_pool->xa_num_irqs.max)
624 return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
627 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
629 int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
630 MLX5_CAP_GEN(dev, max_num_eqs) :
631 1 << MLX5_CAP_GEN(dev, log_max_eq);
636 if (mlx5_core_is_sf(dev))
639 pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
640 pf_vec = min_t(int, pf_vec, num_eqs);
643 if (mlx5_sf_max_functions(dev))
644 total_vec += MLX5_IRQ_CTRL_SF_MAX +
645 MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
647 total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
650 pf_vec = min(pf_vec, total_vec);
652 err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
654 pci_free_irq_vectors(dev->pdev);
659 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
661 struct mlx5_irq_table *table = dev->priv.irq_table;
663 if (mlx5_core_is_sf(dev))
666 /* There are cases where IRQs still will be in used when we reaching
667 * to here. Hence, making sure all the irqs are released.
669 irq_pools_destroy(table);
670 pci_free_irq_vectors(dev->pdev);
673 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
675 if (table->sf_comp_pool)
676 return min_t(int, num_online_cpus(),
677 table->sf_comp_pool->xa_num_irqs.max -
678 table->sf_comp_pool->xa_num_irqs.min + 1);
680 return mlx5_irq_table_get_num_comp(table);
683 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
685 #ifdef CONFIG_MLX5_SF
686 if (mlx5_core_is_sf(dev))
687 return dev->priv.parent_mdev->priv.irq_table;
689 return dev->priv.irq_table;