drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /* Copyright (c) 2019 Mellanox Technologies. */
   3
   4 #include <linux/pci.h>
   5 #include <linux/interrupt.h>
   6 #include <linux/notifier.h>
   7 #include <linux/mlx5/driver.h>
   8 #include <linux/mlx5/vport.h>
   9 #include "mlx5_core.h"
  10 #include "mlx5_irq.h"
  11 #include "pci_irq.h"
  12 #include "lib/sf.h"
  13 #include "lib/eq.h"
  14 #ifdef CONFIG_RFS_ACCEL
  15 #include <linux/cpu_rmap.h>
  16 #endif
  17
  18 #define MLX5_SFS_PER_CTRL_IRQ 64
  19 #define MLX5_IRQ_CTRL_SF_MAX 8
  20 /* min num of vectors for SFs to be enabled */
  21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
  22
  23 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
  24 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
  25 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
  26 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
  27
  28 struct mlx5_irq {
  29         struct atomic_notifier_head nh;
  30         cpumask_var_t mask;
  31         char name[MLX5_MAX_IRQ_NAME];
  32         struct mlx5_irq_pool *pool;
  33         int refcount;
  34         struct msi_map map;
  35         u32 pool_index;
  36 };
  37
  38 struct mlx5_irq_table {
  39         struct mlx5_irq_pool *pcif_pool;
  40         struct mlx5_irq_pool *sf_ctrl_pool;
  41         struct mlx5_irq_pool *sf_comp_pool;
  42 };
  43
  44 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
  45                                    int func,
  46                                    bool ec_vf_func)
  47 {
  48         if (!ec_vf_func)
  49                 return func;
  50         return mlx5_core_ec_vf_vport_base(dev) + func - 1;
  51 }
  52
  53 /**
  54  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
  55  *                                   to be ssigned to each VF.
  56  * @dev: PF to work on
  57  * @num_vfs: Number of enabled VFs
  58  */
  59 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
  60 {
  61         int num_vf_msix, min_msix, max_msix;
  62
  63         num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
  64         if (!num_vf_msix)
  65                 return 0;
  66
  67         min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
  68         max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
  69
  70         /* Limit maximum number of MSI-X vectors so the default configuration
  71          * has some available in the pool. This will allow the user to increase
  72          * the number of vectors in a VF without having to first size-down other
  73          * VFs.
  74          */
  75         return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
  76 }
  77
  78 /**
  79  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
  80  * @dev: PF to work on
  81  * @function_id: Internal PCI VF function IDd
  82  * @msix_vec_count: Number of MSI-X vectors to set
  83  */
  84 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
  85                             int msix_vec_count)
  86 {
  87         int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
  88         int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
  89         void *hca_cap = NULL, *query_cap = NULL, *cap;
  90         int num_vf_msix, min_msix, max_msix;
  91         bool ec_vf_function;
  92         int vport;
  93         int ret;
  94
  95         num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
  96         if (!num_vf_msix)
  97                 return 0;
  98
  99         if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
 100                 return -EOPNOTSUPP;
 101
 102         min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
 103         max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
 104
 105         if (msix_vec_count < min_msix)
 106                 return -EINVAL;
 107
 108         if (msix_vec_count > max_msix)
 109                 return -EOVERFLOW;
 110
 111         query_cap = kvzalloc(query_sz, GFP_KERNEL);
 112         hca_cap = kvzalloc(set_sz, GFP_KERNEL);
 113         if (!hca_cap || !query_cap) {
 114                 ret = -ENOMEM;
 115                 goto out;
 116         }
 117
 118         ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
 119         vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
 120         ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
 121         if (ret)
 122                 goto out;
 123
 124         cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
 125         memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
 126                MLX5_UN_SZ_BYTES(hca_cap_union));
 127         MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
 128
 129         MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 130         MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
 131         MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
 132         MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
 133
 134         MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
 135                  MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
 136         ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
 137 out:
 138         kvfree(hca_cap);
 139         kvfree(query_cap);
 140         return ret;
 141 }
 142
 143 /* mlx5_system_free_irq - Free an IRQ
 144  * @irq: IRQ to free
 145  *
 146  * Free the IRQ and other resources such as rmap from the system.
 147  * BUT doesn't free or remove reference from mlx5.
 148  * This function is very important for the shutdown flow, where we need to
 149  * cleanup system resoruces but keep mlx5 objects alive,
 150  * see mlx5_irq_table_free_irqs().
 151  */
 152 static void mlx5_system_free_irq(struct mlx5_irq *irq)
 153 {
 154         struct mlx5_irq_pool *pool = irq->pool;
 155 #ifdef CONFIG_RFS_ACCEL
 156         struct cpu_rmap *rmap;
 157 #endif
 158
 159         /* free_irq requires that affinity_hint and rmap will be cleared before
 160          * calling it. To satisfy this requirement, we call
 161          * irq_cpu_rmap_remove() to remove the notifier
 162          */
 163         irq_update_affinity_hint(irq->map.virq, NULL);
 164 #ifdef CONFIG_RFS_ACCEL
 165         rmap = mlx5_eq_table_get_rmap(pool->dev);
 166         if (rmap)
 167                 irq_cpu_rmap_remove(rmap, irq->map.virq);
 168 #endif
 169
 170         free_irq(irq->map.virq, &irq->nh);
 171         if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
 172                 pci_msix_free_irq(pool->dev->pdev, irq->map);
 173 }
 174
 175 static void irq_release(struct mlx5_irq *irq)
 176 {
 177         struct mlx5_irq_pool *pool = irq->pool;
 178
 179         xa_erase(&pool->irqs, irq->pool_index);
 180         mlx5_system_free_irq(irq);
 181         free_cpumask_var(irq->mask);
 182         kfree(irq);
 183 }
 184
 185 int mlx5_irq_put(struct mlx5_irq *irq)
 186 {
 187         struct mlx5_irq_pool *pool = irq->pool;
 188         int ret = 0;
 189
 190         mutex_lock(&pool->lock);
 191         irq->refcount--;
 192         if (!irq->refcount) {
 193                 irq_release(irq);
 194                 ret = 1;
 195         }
 196         mutex_unlock(&pool->lock);
 197         return ret;
 198 }
 199
 200 int mlx5_irq_read_locked(struct mlx5_irq *irq)
 201 {
 202         lockdep_assert_held(&irq->pool->lock);
 203         return irq->refcount;
 204 }
 205
 206 int mlx5_irq_get_locked(struct mlx5_irq *irq)
 207 {
 208         lockdep_assert_held(&irq->pool->lock);
 209         if (WARN_ON_ONCE(!irq->refcount))
 210                 return 0;
 211         irq->refcount++;
 212         return 1;
 213 }
 214
 215 static int irq_get(struct mlx5_irq *irq)
 216 {
 217         int err;
 218
 219         mutex_lock(&irq->pool->lock);
 220         err = mlx5_irq_get_locked(irq);
 221         mutex_unlock(&irq->pool->lock);
 222         return err;
 223 }
 224
 225 static irqreturn_t irq_int_handler(int irq, void *nh)
 226 {
 227         atomic_notifier_call_chain(nh, 0, NULL);
 228         return IRQ_HANDLED;
 229 }
 230
 231 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 232 {
 233         snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
 234 }
 235
 236 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 237 {
 238         if (!pool->xa_num_irqs.max) {
 239                 /* in case we only have a single irq for the device */
 240                 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
 241                 return;
 242         }
 243
 244         if (!vecidx) {
 245                 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
 246                 return;
 247         }
 248
 249         snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 250 }
 251
 252 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
 253                                 struct irq_affinity_desc *af_desc,
 254                                 struct cpu_rmap **rmap)
 255 {
 256         struct mlx5_core_dev *dev = pool->dev;
 257         char name[MLX5_MAX_IRQ_NAME];
 258         struct mlx5_irq *irq;
 259         int err;
 260
 261         irq = kzalloc(sizeof(*irq), GFP_KERNEL);
 262         if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
 263                 kfree(irq);
 264                 return ERR_PTR(-ENOMEM);
 265         }
 266
 267         if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
 268                 /* The vector at index 0 is always statically allocated. If
 269                  * dynamic irq is not supported all vectors are statically
 270                  * allocated. In both cases just get the irq number and set
 271                  * the index.
 272                  */
 273                 irq->map.virq = pci_irq_vector(dev->pdev, i);
 274                 irq->map.index = i;
 275         } else {
 276                 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
 277                 if (!irq->map.virq) {
 278                         err = irq->map.index;
 279                         goto err_alloc_irq;
 280                 }
 281         }
 282
 283         if (i && rmap && *rmap) {
 284 #ifdef CONFIG_RFS_ACCEL
 285                 err = irq_cpu_rmap_add(*rmap, irq->map.virq);
 286                 if (err)
 287                         goto err_irq_rmap;
 288 #endif
 289         }
 290         if (!mlx5_irq_pool_is_sf_pool(pool))
 291                 irq_set_name(pool, name, i);
 292         else
 293                 irq_sf_set_name(pool, name, i);
 294         ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
 295         snprintf(irq->name, MLX5_MAX_IRQ_NAME,
 296                  "%s@pci:%s", name, pci_name(dev->pdev));
 297         err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
 298                           &irq->nh);
 299         if (err) {
 300                 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
 301                 goto err_req_irq;
 302         }
 303
 304         if (af_desc) {
 305                 cpumask_copy(irq->mask, &af_desc->mask);
 306                 irq_set_affinity_and_hint(irq->map.virq, irq->mask);
 307         }
 308         irq->pool = pool;
 309         irq->refcount = 1;
 310         irq->pool_index = i;
 311         err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
 312         if (err) {
 313                 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
 314                               irq->pool_index, err);
 315                 goto err_xa;
 316         }
 317         return irq;
 318 err_xa:
 319         if (af_desc)
 320                 irq_update_affinity_hint(irq->map.virq, NULL);
 321         free_irq(irq->map.virq, &irq->nh);
 322 err_req_irq:
 323 #ifdef CONFIG_RFS_ACCEL
 324         if (i && rmap && *rmap) {
 325                 free_irq_cpu_rmap(*rmap);
 326                 *rmap = NULL;
 327         }
 328 err_irq_rmap:
 329 #endif
 330         if (i && pci_msix_can_alloc_dyn(dev->pdev))
 331                 pci_msix_free_irq(dev->pdev, irq->map);
 332 err_alloc_irq:
 333         free_cpumask_var(irq->mask);
 334         kfree(irq);
 335         return ERR_PTR(err);
 336 }
 337
 338 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 339 {
 340         int ret;
 341
 342         ret = irq_get(irq);
 343         if (!ret)
 344                 /* Something very bad happens here, we are enabling EQ
 345                  * on non-existing IRQ.
 346                  */
 347                 return -ENOENT;
 348         ret = atomic_notifier_chain_register(&irq->nh, nb);
 349         if (ret)
 350                 mlx5_irq_put(irq);
 351         return ret;
 352 }
 353
 354 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 355 {
 356         int err = 0;
 357
 358         err = atomic_notifier_chain_unregister(&irq->nh, nb);
 359         mlx5_irq_put(irq);
 360         return err;
 361 }
 362
 363 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
 364 {
 365         return irq->mask;
 366 }
 367
 368 int mlx5_irq_get_index(struct mlx5_irq *irq)
 369 {
 370         return irq->map.index;
 371 }
 372
 373 /* irq_pool API */
 374
 375 /* requesting an irq from a given pool according to given index */
 376 static struct mlx5_irq *
 377 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
 378                         struct irq_affinity_desc *af_desc,
 379                         struct cpu_rmap **rmap)
 380 {
 381         struct mlx5_irq *irq;
 382
 383         mutex_lock(&pool->lock);
 384         irq = xa_load(&pool->irqs, vecidx);
 385         if (irq) {
 386                 mlx5_irq_get_locked(irq);
 387                 goto unlock;
 388         }
 389         irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
 390 unlock:
 391         mutex_unlock(&pool->lock);
 392         return irq;
 393 }
 394
 395 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
 396 {
 397         return irq_table->sf_ctrl_pool;
 398 }
 399
 400 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
 401 {
 402         return irq_table->sf_comp_pool;
 403 }
 404
 405 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
 406 {
 407         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 408         struct mlx5_irq_pool *pool = NULL;
 409
 410         if (mlx5_core_is_sf(dev))
 411                 pool = sf_irq_pool_get(irq_table);
 412
 413         /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
 414          * the PF IRQs pool in case the SF pool doesn't exist.
 415          */
 416         return pool ? pool : irq_table->pcif_pool;
 417 }
 418
 419 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
 420 {
 421         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 422         struct mlx5_irq_pool *pool = NULL;
 423
 424         if (mlx5_core_is_sf(dev))
 425                 pool = sf_ctrl_irq_pool_get(irq_table);
 426
 427         /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
 428          * the PF IRQs pool in case the SF pool doesn't exist.
 429          */
 430         return pool ? pool : irq_table->pcif_pool;
 431 }
 432
 433 static void _mlx5_irq_release(struct mlx5_irq *irq)
 434 {
 435         synchronize_irq(irq->map.virq);
 436         mlx5_irq_put(irq);
 437 }
 438
 439 /**
 440  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
 441  * @ctrl_irq: ctrl IRQ to be released.
 442  */
 443 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
 444 {
 445         _mlx5_irq_release(ctrl_irq);
 446 }
 447
 448 /**
 449  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
 450  * @dev: mlx5 device that requesting the IRQ.
 451  *
 452  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
 453  */
 454 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
 455 {
 456         struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
 457         struct irq_affinity_desc af_desc;
 458         struct mlx5_irq *irq;
 459
 460         cpumask_copy(&af_desc.mask, cpu_online_mask);
 461         af_desc.is_managed = false;
 462         if (!mlx5_irq_pool_is_sf_pool(pool)) {
 463                 /* In case we are allocating a control IRQ from a pci device's pool.
 464                  * This can happen also for a SF if the SFs pool is empty.
 465                  */
 466                 if (!pool->xa_num_irqs.max) {
 467                         cpumask_clear(&af_desc.mask);
 468                         /* In case we only have a single IRQ for PF/VF */
 469                         cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
 470                 }
 471                 /* Allocate the IRQ in index 0. The vector was already allocated */
 472                 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
 473         } else {
 474                 irq = mlx5_irq_affinity_request(pool, &af_desc);
 475         }
 476
 477         return irq;
 478 }
 479
 480 /**
 481  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
 482  * @dev: mlx5 device that requesting the IRQ.
 483  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
 484  * provided.
 485  * @af_desc: affinity descriptor for this IRQ.
 486  * @rmap: pointer to reverse map pointer for completion interrupts
 487  *
 488  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
 489  */
 490 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
 491                                   struct irq_affinity_desc *af_desc,
 492                                   struct cpu_rmap **rmap)
 493 {
 494         struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
 495         struct mlx5_irq_pool *pool;
 496         struct mlx5_irq *irq;
 497
 498         pool = irq_table->pcif_pool;
 499         irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
 500         if (IS_ERR(irq))
 501                 return irq;
 502         mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
 503                       irq->map.virq, cpumask_pr_args(&af_desc->mask),
 504                       irq->refcount / MLX5_EQ_REFS_PER_IRQ);
 505         return irq;
 506 }
 507
 508 /**
 509  * mlx5_msix_alloc - allocate msix interrupt
 510  * @dev: mlx5 device from which to request
 511  * @handler: interrupt handler
 512  * @affdesc: affinity descriptor
 513  * @name: interrupt name
 514  *
 515  * Returns: struct msi_map with result encoded.
 516  * Note: the caller must make sure to release the irq by calling
 517  *       mlx5_msix_free() if shutdown was initiated.
 518  */
 519 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
 520                                irqreturn_t (*handler)(int, void *),
 521                                const struct irq_affinity_desc *affdesc,
 522                                const char *name)
 523 {
 524         struct msi_map map;
 525         int err;
 526
 527         if (!dev->pdev) {
 528                 map.virq = 0;
 529                 map.index = -EINVAL;
 530                 return map;
 531         }
 532
 533         map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
 534         if (!map.virq)
 535                 return map;
 536
 537         err = request_irq(map.virq, handler, 0, name, NULL);
 538         if (err) {
 539                 mlx5_core_warn(dev, "err %d\n", err);
 540                 pci_msix_free_irq(dev->pdev, map);
 541                 map.virq = 0;
 542                 map.index = -ENOMEM;
 543         }
 544         return map;
 545 }
 546 EXPORT_SYMBOL(mlx5_msix_alloc);
 547
 548 /**
 549  * mlx5_msix_free - free a previously allocated msix interrupt
 550  * @dev: mlx5 device associated with interrupt
 551  * @map: map previously returned by mlx5_msix_alloc()
 552  */
 553 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
 554 {
 555         free_irq(map.virq, NULL);
 556         pci_msix_free_irq(dev->pdev, map);
 557 }
 558 EXPORT_SYMBOL(mlx5_msix_free);
 559
 560 /**
 561  * mlx5_irq_release_vector - release one IRQ back to the system.
 562  * @irq: the irq to release.
 563  */
 564 void mlx5_irq_release_vector(struct mlx5_irq *irq)
 565 {
 566         _mlx5_irq_release(irq);
 567 }
 568
 569 /**
 570  * mlx5_irq_request_vector - request one IRQ for mlx5 device.
 571  * @dev: mlx5 device that is requesting the IRQ.
 572  * @cpu: CPU to bind the IRQ to.
 573  * @vecidx: vector index to request an IRQ for.
 574  * @rmap: pointer to reverse map pointer for completion interrupts
 575  *
 576  * Each IRQ is bound to at most 1 CPU.
 577  * This function is requests one IRQ, for the given @vecidx.
 578  *
 579  * This function returns a pointer to the irq on success, or an error pointer
 580  * in case of an error.
 581  */
 582 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
 583                                          u16 vecidx, struct cpu_rmap **rmap)
 584 {
 585         struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
 586         struct mlx5_irq_pool *pool = table->pcif_pool;
 587         struct irq_affinity_desc af_desc;
 588         int offset = 1;
 589
 590         if (!pool->xa_num_irqs.max)
 591                 offset = 0;
 592
 593         af_desc.is_managed = false;
 594         cpumask_clear(&af_desc.mask);
 595         cpumask_set_cpu(cpu, &af_desc.mask);
 596         return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap);
 597 }
 598
 599 static struct mlx5_irq_pool *
 600 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
 601                u32 min_threshold, u32 max_threshold)
 602 {
 603         struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
 604
 605         if (!pool)
 606                 return ERR_PTR(-ENOMEM);
 607         pool->dev = dev;
 608         mutex_init(&pool->lock);
 609         xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
 610         pool->xa_num_irqs.min = start;
 611         pool->xa_num_irqs.max = start + size - 1;
 612         if (name)
 613                 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
 614                          "%s", name);
 615         pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
 616         pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
 617         mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
 618                       name, size, start);
 619         return pool;
 620 }
 621
 622 static void irq_pool_free(struct mlx5_irq_pool *pool)
 623 {
 624         struct mlx5_irq *irq;
 625         unsigned long index;
 626
 627         /* There are cases in which we are destrying the irq_table before
 628          * freeing all the IRQs, fast teardown for example. Hence, free the irqs
 629          * which might not have been freed.
 630          */
 631         xa_for_each(&pool->irqs, index, irq)
 632                 irq_release(irq);
 633         xa_destroy(&pool->irqs);
 634         mutex_destroy(&pool->lock);
 635         kfree(pool->irqs_per_cpu);
 636         kvfree(pool);
 637 }
 638
 639 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
 640 {
 641         struct mlx5_irq_table *table = dev->priv.irq_table;
 642         int num_sf_ctrl_by_msix;
 643         int num_sf_ctrl_by_sfs;
 644         int num_sf_ctrl;
 645         int err;
 646
 647         /* init pcif_pool */
 648         table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
 649                                           MLX5_EQ_SHARE_IRQ_MIN_COMP,
 650                                           MLX5_EQ_SHARE_IRQ_MAX_COMP);
 651         if (IS_ERR(table->pcif_pool))
 652                 return PTR_ERR(table->pcif_pool);
 653         if (!mlx5_sf_max_functions(dev))
 654                 return 0;
 655         if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
 656                 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
 657                 return 0;
 658         }
 659
 660         /* init sf_ctrl_pool */
 661         num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
 662         num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
 663                                           MLX5_SFS_PER_CTRL_IRQ);
 664         num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
 665         num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
 666         table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
 667                                              "mlx5_sf_ctrl",
 668                                              MLX5_EQ_SHARE_IRQ_MIN_CTRL,
 669                                              MLX5_EQ_SHARE_IRQ_MAX_CTRL);
 670         if (IS_ERR(table->sf_ctrl_pool)) {
 671                 err = PTR_ERR(table->sf_ctrl_pool);
 672                 goto err_pf;
 673         }
 674         /* init sf_comp_pool */
 675         table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
 676                                              sf_vec - num_sf_ctrl, "mlx5_sf_comp",
 677                                              MLX5_EQ_SHARE_IRQ_MIN_COMP,
 678                                              MLX5_EQ_SHARE_IRQ_MAX_COMP);
 679         if (IS_ERR(table->sf_comp_pool)) {
 680                 err = PTR_ERR(table->sf_comp_pool);
 681                 goto err_sf_ctrl;
 682         }
 683
 684         table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
 685         if (!table->sf_comp_pool->irqs_per_cpu) {
 686                 err = -ENOMEM;
 687                 goto err_irqs_per_cpu;
 688         }
 689
 690         return 0;
 691
 692 err_irqs_per_cpu:
 693         irq_pool_free(table->sf_comp_pool);
 694 err_sf_ctrl:
 695         irq_pool_free(table->sf_ctrl_pool);
 696 err_pf:
 697         irq_pool_free(table->pcif_pool);
 698         return err;
 699 }
 700
 701 static void irq_pools_destroy(struct mlx5_irq_table *table)
 702 {
 703         if (table->sf_ctrl_pool) {
 704                 irq_pool_free(table->sf_comp_pool);
 705                 irq_pool_free(table->sf_ctrl_pool);
 706         }
 707         irq_pool_free(table->pcif_pool);
 708 }
 709
 710 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
 711 {
 712         struct mlx5_irq *irq;
 713         unsigned long index;
 714
 715         xa_for_each(&pool->irqs, index, irq)
 716                 mlx5_system_free_irq(irq);
 717
 718 }
 719
 720 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
 721 {
 722         if (table->sf_ctrl_pool) {
 723                 mlx5_irq_pool_free_irqs(table->sf_comp_pool);
 724                 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
 725         }
 726         mlx5_irq_pool_free_irqs(table->pcif_pool);
 727 }
 728
 729 /* irq_table API */
 730
 731 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
 732 {
 733         struct mlx5_irq_table *irq_table;
 734
 735         if (mlx5_core_is_sf(dev))
 736                 return 0;
 737
 738         irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
 739                                   dev->priv.numa_node);
 740         if (!irq_table)
 741                 return -ENOMEM;
 742
 743         dev->priv.irq_table = irq_table;
 744         return 0;
 745 }
 746
 747 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
 748 {
 749         if (mlx5_core_is_sf(dev))
 750                 return;
 751
 752         kvfree(dev->priv.irq_table);
 753 }
 754
 755 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
 756 {
 757         if (!table->pcif_pool->xa_num_irqs.max)
 758                 return 1;
 759         return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
 760 }
 761
 762 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 763 {
 764         int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
 765                       MLX5_CAP_GEN(dev, max_num_eqs) :
 766                       1 << MLX5_CAP_GEN(dev, log_max_eq);
 767         int total_vec;
 768         int pcif_vec;
 769         int req_vec;
 770         int err;
 771         int n;
 772
 773         if (mlx5_core_is_sf(dev))
 774                 return 0;
 775
 776         pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
 777         pcif_vec = min_t(int, pcif_vec, num_eqs);
 778
 779         total_vec = pcif_vec;
 780         if (mlx5_sf_max_functions(dev))
 781                 total_vec += MLX5_IRQ_CTRL_SF_MAX +
 782                         MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
 783         total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
 784         pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
 785
 786         req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
 787         n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
 788         if (n < 0)
 789                 return n;
 790
 791         err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
 792         if (err)
 793                 pci_free_irq_vectors(dev->pdev);
 794
 795         return err;
 796 }
 797
 798 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
 799 {
 800         struct mlx5_irq_table *table = dev->priv.irq_table;
 801
 802         if (mlx5_core_is_sf(dev))
 803                 return;
 804
 805         /* There are cases where IRQs still will be in used when we reaching
 806          * to here. Hence, making sure all the irqs are released.
 807          */
 808         irq_pools_destroy(table);
 809         pci_free_irq_vectors(dev->pdev);
 810 }
 811
 812 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
 813 {
 814         struct mlx5_irq_table *table = dev->priv.irq_table;
 815
 816         if (mlx5_core_is_sf(dev))
 817                 return;
 818
 819         mlx5_irq_pools_free_irqs(table);
 820         pci_free_irq_vectors(dev->pdev);
 821 }
 822
 823 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
 824 {
 825         if (table->sf_comp_pool)
 826                 return min_t(int, num_online_cpus(),
 827                              table->sf_comp_pool->xa_num_irqs.max -
 828                              table->sf_comp_pool->xa_num_irqs.min + 1);
 829         else
 830                 return mlx5_irq_table_get_num_comp(table);
 831 }
 832
 833 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
 834 {
 835 #ifdef CONFIG_MLX5_SF
 836         if (mlx5_core_is_sf(dev))
 837                 return dev->priv.parent_mdev->priv.irq_table;
 838 #endif
 839         return dev->priv.irq_table;
 840 }