Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox...
authorSaeed Mahameed <saeedm@mellanox.com>
Fri, 28 Jun 2019 22:49:59 +0000 (15:49 -0700)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 28 Jun 2019 23:03:54 +0000 (16:03 -0700)
Misc updates from mlx5-next branch:

1) E-Switch vport metadata support for source vport matching
2) Convert mkey_table to XArray
3) Shared IRQs and to use single IRQ for all async EQs

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
33 files changed:
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/flow.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
include/linux/mlx5/driver.h
include/linux/mlx5/eq.h
include/linux/mlx5/eswitch.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/qp.h
include/net/devlink.h
net/core/devlink.c

index 2e2e65f..0220736 100644 (file)
@@ -522,9 +522,9 @@ repoll:
        case MLX5_CQE_SIG_ERR:
                sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 
-               read_lock(&dev->mdev->priv.mkey_table.lock);
-               mmkey = __mlx5_mr_lookup(dev->mdev,
-                                        mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+               xa_lock(&dev->mdev->priv.mkey_table);
+               mmkey = xa_load(&dev->mdev->priv.mkey_table,
+                               mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
                mr = to_mibmr(mmkey);
                get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
                mr->sig->sig_err_exists = true;
@@ -537,7 +537,7 @@ repoll:
                             mr->sig->err_item.expected,
                             mr->sig->err_item.actual);
 
-               read_unlock(&dev->mdev->priv.mkey_table.lock);
+               xa_unlock(&dev->mdev->priv.mkey_table);
                goto repoll;
        }
 
index 80b42d0..931f587 100644 (file)
@@ -1043,13 +1043,10 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
                                     struct mlx5_ib_dev *dev,
                                     void *in, void *out)
 {
-       struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
        struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
-       unsigned long flags;
        struct mlx5_core_mkey *mkey;
        void *mkc;
        u8 key;
-       int err;
 
        mkey = &devx_mr->mmkey;
        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -1062,11 +1059,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
        mkey->pd = MLX5_GET(mkc, mkc, pd);
        devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
 
-       write_lock_irqsave(&table->lock, flags);
-       err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
-                               mkey);
-       write_unlock_irqrestore(&table->lock, flags);
-       return err;
+       return xa_err(xa_store(&dev->mdev->priv.mkey_table,
+                              mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
 }
 
 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1117,12 +1111,8 @@ static void devx_free_indirect_mkey(struct rcu_head *rcu)
  */
 static void devx_cleanup_mkey(struct devx_obj *obj)
 {
-       struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
-       unsigned long flags;
-
-       write_lock_irqsave(&table->lock, flags);
-       radix_tree_delete(&table->tree, mlx5_base_mkey(obj->devx_mr.mmkey.key));
-       write_unlock_irqrestore(&table->lock, flags);
+       xa_erase(&obj->mdev->priv.mkey_table,
+                mlx5_base_mkey(obj->devx_mr.mmkey.key));
 }
 
 static int devx_obj_cleanup(struct ib_uobject *uobject,
index 1fc302d..b884135 100644 (file)
@@ -65,11 +65,12 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
        struct uverbs_attr_bundle *attrs)
 {
-       struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+       struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
        struct mlx5_ib_flow_handler *flow_handler;
        struct mlx5_ib_flow_matcher *fs_matcher;
        struct ib_uobject **arr_flow_actions;
        struct ib_uflow_resources *uflow_res;
+       struct mlx5_flow_act flow_act = {};
        void *devx_obj;
        int dest_id, dest_type;
        void *cmd_in;
@@ -172,17 +173,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
                                   arr_flow_actions[i]->object);
        }
 
-       ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
+       ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
                               MLX5_IB_ATTR_CREATE_FLOW_TAG);
        if (!ret) {
-               if (flow_act.flow_tag >= BIT(24)) {
+               if (flow_context.flow_tag >= BIT(24)) {
                        ret = -EINVAL;
                        goto err_out;
                }
-               flow_act.flags |= FLOW_ACT_HAS_TAG;
+               flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
        }
 
-       flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+       flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher,
+                                              &flow_context,
+                                              &flow_act,
                                               counter_id,
                                               cmd_in, inlen,
                                               dest_id, dest_type);
index 340290b..b1d5f43 100644 (file)
@@ -2666,11 +2666,15 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
        }
 }
 
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
-                          u32 *match_v, const union ib_flow_spec *ib_spec,
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+                          struct mlx5_flow_spec *spec,
+                          const union ib_flow_spec *ib_spec,
                           const struct ib_flow_attr *flow_attr,
                           struct mlx5_flow_act *action, u32 prev_type)
 {
+       struct mlx5_flow_context *flow_context = &spec->flow_context;
+       u32 *match_c = spec->match_criteria;
+       u32 *match_v = spec->match_value;
        void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           misc_parameters);
        void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
@@ -2989,8 +2993,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
                if (ib_spec->flow_tag.tag_id >= BIT(24))
                        return -EINVAL;
 
-               action->flow_tag = ib_spec->flow_tag.tag_id;
-               action->flags |= FLOW_ACT_HAS_TAG;
+               flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+               flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
                break;
        case IB_FLOW_SPEC_ACTION_DROP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -3084,7 +3088,8 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
                return VALID_SPEC_NA;
 
        return is_crypto && is_ipsec &&
-               (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
+               (!egress || (!is_drop &&
+                            !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ?
                VALID_SPEC_VALID : VALID_SPEC_INVALID;
 }
 
@@ -3464,6 +3469,37 @@ free:
        return ret;
 }
 
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+                                        struct mlx5_flow_spec *spec,
+                                        struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+       void *misc;
+
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters_2);
+
+               MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+                        mlx5_eswitch_get_vport_metadata_for_match(esw,
+                                                                  rep->vport));
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters_2);
+
+               MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
+       } else {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
+
+               MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       }
+}
+
 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
                                                      struct mlx5_ib_flow_prio *ft_prio,
                                                      const struct ib_flow_attr *flow_attr,
@@ -3473,7 +3509,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 {
        struct mlx5_flow_table  *ft = ft_prio->flow_table;
        struct mlx5_ib_flow_handler *handler;
-       struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+       struct mlx5_flow_act flow_act = {};
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_destination dest_arr[2] = {};
        struct mlx5_flow_destination *rule_dst = dest_arr;
@@ -3504,8 +3540,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
        }
 
        for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
-               err = parse_flow_attr(dev->mdev, spec->match_criteria,
-                                     spec->match_value,
+               err = parse_flow_attr(dev->mdev, spec,
                                      ib_flow, flow_attr, &flow_act,
                                      prev_type);
                if (err < 0)
@@ -3519,19 +3554,15 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
                set_underlay_qp(dev, spec, underlay_qpn);
 
        if (dev->is_rep) {
-               void *misc;
+               struct mlx5_eswitch_rep *rep;
 
-               if (!dev->port[flow_attr->port - 1].rep) {
+               rep = dev->port[flow_attr->port - 1].rep;
+               if (!rep) {
                        err = -EINVAL;
                        goto free;
                }
-               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-                                   misc_parameters);
-               MLX5_SET(fte_match_set_misc, misc, source_port,
-                        dev->port[flow_attr->port - 1].rep->vport);
-               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-                                   misc_parameters);
-               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+               mlx5_ib_set_rule_source_port(dev, spec, rep);
        }
 
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
@@ -3572,11 +3603,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
                                        MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
        }
 
-       if ((flow_act.flags & FLOW_ACT_HAS_TAG)  &&
+       if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
            (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
             flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
                mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
-                            flow_act.flow_tag, flow_attr->type);
+                            spec->flow_context.flow_tag, flow_attr->type);
                err = -EINVAL;
                goto free;
        }
@@ -3947,6 +3978,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
                      struct mlx5_ib_flow_prio *ft_prio,
                      struct mlx5_flow_destination *dst,
                      struct mlx5_ib_flow_matcher  *fs_matcher,
+                     struct mlx5_flow_context *flow_context,
                      struct mlx5_flow_act *flow_act,
                      void *cmd_in, int inlen,
                      int dst_num)
@@ -3969,6 +4001,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
        memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
               fs_matcher->mask_len);
        spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+       spec->flow_context = *flow_context;
 
        handler->rule = mlx5_add_flow_rules(ft, spec,
                                            flow_act, dst, dst_num);
@@ -4033,6 +4066,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
 struct mlx5_ib_flow_handler *
 mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
                        struct mlx5_ib_flow_matcher *fs_matcher,
+                       struct mlx5_flow_context *flow_context,
                        struct mlx5_flow_act *flow_act,
                        u32 counter_id,
                        void *cmd_in, int inlen, int dest_id,
@@ -4085,7 +4119,8 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
                dst_num++;
        }
 
-       handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
+       handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
+                                       flow_context, flow_act,
                                        cmd_in, inlen, dst_num);
 
        if (IS_ERR(handler)) {
index 40eb8be..1c205c2 100644 (file)
@@ -920,6 +920,7 @@ struct mlx5_ib_lb_state {
 };
 
 struct mlx5_ib_pf_eq {
+       struct notifier_block irq_nb;
        struct mlx5_ib_dev *dev;
        struct mlx5_eq *core;
        struct work_struct work;
@@ -1316,6 +1317,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[];
 extern const struct uapi_definition mlx5_ib_flow_defs[];
 struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
        struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+       struct mlx5_flow_context *flow_context,
        struct mlx5_flow_act *flow_act, u32 counter_id,
        void *cmd_in, int inlen, int dest_id, int dest_type);
 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
index 5f09699..83b452d 100644 (file)
@@ -130,7 +130,7 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
        struct mlx5_cache_ent *ent = &cache->ent[c];
        u8 key;
        unsigned long flags;
-       struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+       struct xarray *mkeys = &dev->mdev->priv.mkey_table;
        int err;
 
        spin_lock_irqsave(&ent->lock, flags);
@@ -158,12 +158,12 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context)
        ent->size++;
        spin_unlock_irqrestore(&ent->lock, flags);
 
-       write_lock_irqsave(&table->lock, flags);
-       err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
-                               &mr->mmkey);
+       xa_lock_irqsave(mkeys, flags);
+       err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
+                               &mr->mmkey, GFP_ATOMIC));
+       xa_unlock_irqrestore(mkeys, flags);
        if (err)
                pr_err("Error inserting to mkey tree. 0x%x\n", -err);
-       write_unlock_irqrestore(&table->lock, flags);
 
        if (!completion_done(&ent->compl))
                complete(&ent->compl);
index 91507a2..c594489 100644 (file)
@@ -768,7 +768,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
        bcnt -= *bytes_committed;
 
 next_mr:
-       mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(key));
+       mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
        if (!mkey_is_eq(mmkey, key)) {
                mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
                ret = -EFAULT;
@@ -1488,9 +1488,11 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
        mlx5_eq_update_ci(eq->core, cc, 1);
 }
 
-static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type,
+                            void *data)
 {
-       struct mlx5_ib_pf_eq *eq = eq_ptr;
+       struct mlx5_ib_pf_eq *eq =
+               container_of(nb, struct mlx5_ib_pf_eq, irq_nb);
        unsigned long flags;
 
        if (spin_trylock_irqsave(&eq->lock, flags)) {
@@ -1553,20 +1555,26 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
                goto err_mempool;
        }
 
+       eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
        param = (struct mlx5_eq_param) {
-               .index = MLX5_EQ_PFAULT_IDX,
+               .irq_index = 0,
                .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
                .nent = MLX5_IB_NUM_PF_EQE,
-               .context = eq,
-               .handler = mlx5_ib_eq_pf_int
        };
-       eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+       eq->core = mlx5_eq_create_generic(dev->mdev, &param);
        if (IS_ERR(eq->core)) {
                err = PTR_ERR(eq->core);
                goto err_wq;
        }
+       err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb);
+       if (err) {
+               mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err);
+               goto err_eq;
+       }
 
        return 0;
+err_eq:
+       mlx5_eq_destroy_generic(dev->mdev, eq->core);
 err_wq:
        destroy_workqueue(eq->wq);
 err_mempool:
@@ -1579,6 +1587,7 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 {
        int err;
 
+       mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
        err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
        cancel_work_sync(&eq->work);
        destroy_workqueue(eq->wq);
@@ -1677,8 +1686,8 @@ static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
                struct mlx5_core_mkey *mmkey;
                struct mlx5_ib_mr *mr;
 
-               mmkey = __mlx5_mr_lookup(dev->mdev,
-                                        mlx5_base_mkey(sg_list[i].lkey));
+               mmkey = xa_load(&dev->mdev->priv.mkey_table,
+                               mlx5_base_mkey(sg_list[i].lkey));
                mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
                atomic_dec(&mr->num_pending_prefetch);
        }
@@ -1697,8 +1706,8 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
                struct mlx5_core_mkey *mmkey;
                struct mlx5_ib_mr *mr;
 
-               mmkey = __mlx5_mr_lookup(dev->mdev,
-                                        mlx5_base_mkey(sg_list[i].lkey));
+               mmkey = xa_load(&dev->mdev->priv.mkey_table,
+                               mlx5_base_mkey(sg_list[i].lkey));
                if (!mmkey || mmkey->key != sg_list[i].lkey) {
                        ret = false;
                        break;
index 5fe2bf9..6aa0fe6 100644 (file)
@@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 #
 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
-               transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+               transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
                fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
                lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
                diag/fw_tracer.o diag/crdump.o devlink.o
index a4cf123..ddf1b87 100644 (file)
@@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte,
                __field(u32, index)
                __field(u32, action)
                __field(u32, flow_tag)
+               __field(u32, flow_source)
                __field(u8,  mask_enable)
                __field(int, new_fte)
                __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
@@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte,
                           __entry->index = fte->index;
                           __entry->action = fte->action.action;
                           __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
-                          __entry->flow_tag = fte->action.flow_tag;
+                          __entry->flow_tag = fte->flow_context.flow_tag;
+                          __entry->flow_source = fte->flow_context.flow_source;
                           memcpy(__entry->mask_outer,
                                  MLX5_ADDR_OF(fte_match_param,
                                               &__entry->fg->mask.match_criteria,
index 4421c10..8396626 100644 (file)
@@ -426,7 +426,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
        }
 
        spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-       flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
        rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
index eb8433c..1453da6 100644 (file)
@@ -717,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                      struct mlx5e_tc_flow *flow,
                      struct netlink_ext_ack *extack)
 {
+       struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
        struct mlx5_core_dev *dev = priv->mdev;
        struct mlx5_flow_destination dest[2] = {};
        struct mlx5_flow_act flow_act = {
                .action = attr->action,
-               .flow_tag = attr->flow_tag,
                .reformat_id = 0,
-               .flags    = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
+               .flags    = FLOW_ACT_NO_APPEND,
        };
        struct mlx5_fc *counter = NULL;
        bool table_created = false;
        int err, dest_ix = 0;
 
+       flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+       flow_context->flow_tag = attr->flow_tag;
+
        if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
                if (err) {
index 5e9319d..58fff2f 100644 (file)
@@ -61,17 +61,21 @@ enum {
        MLX5_EQ_DOORBEL_OFFSET  = 0x40,
 };
 
-struct mlx5_irq_info {
-       cpumask_var_t mask;
-       char name[MLX5_MAX_IRQ_NAME];
-       void *context; /* dev_id provided to request_irq */
+/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
+ * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
+ * used to set the EQ size, budget must be smaller than the EQ size.
+ */
+enum {
+       MLX5_EQ_POLLING_BUDGET  = 128,
 };
 
+static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
+
 struct mlx5_eq_table {
        struct list_head        comp_eqs_list;
-       struct mlx5_eq          pages_eq;
-       struct mlx5_eq          cmd_eq;
-       struct mlx5_eq          async_eq;
+       struct mlx5_eq_async    pages_eq;
+       struct mlx5_eq_async    cmd_eq;
+       struct mlx5_eq_async    async_eq;
 
        struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
 
@@ -79,11 +83,8 @@ struct mlx5_eq_table {
        struct mlx5_nb          cq_err_nb;
 
        struct mutex            lock; /* sync async eqs creations */
-       int                     num_comp_vectors;
-       struct mlx5_irq_info    *irq_info;
-#ifdef CONFIG_RFS_ACCEL
-       struct cpu_rmap         *rmap;
-#endif
+       int                     num_comp_eqs;
+       struct mlx5_irq_table   *irq_table;
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)          | \
@@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
        return cq;
 }
 
-static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
+static int mlx5_eq_comp_int(struct notifier_block *nb,
+                           __always_unused unsigned long action,
+                           __always_unused void *data)
 {
-       struct mlx5_eq_comp *eq_comp = eq_ptr;
-       struct mlx5_eq *eq = eq_ptr;
+       struct mlx5_eq_comp *eq_comp =
+               container_of(nb, struct mlx5_eq_comp, irq_nb);
+       struct mlx5_eq *eq = &eq_comp->core;
        struct mlx5_eqe *eqe;
-       int set_ci = 0;
+       int num_eqes = 0;
        u32 cqn = -1;
 
-       while ((eqe = next_eqe_sw(eq))) {
+       eqe = next_eqe_sw(eq);
+       if (!eqe)
+               goto out;
+
+       do {
                struct mlx5_core_cq *cq;
+
                /* Make sure we read EQ entry contents after we've
                 * checked the ownership bit.
                 */
@@ -151,26 +160,16 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr)
                }
 
                ++eq->cons_index;
-               ++set_ci;
 
-               /* The HCA will think the queue has overflowed if we
-                * don't tell it we've been processing events.  We
-                * create our EQs with MLX5_NUM_SPARE_EQE extra
-                * entries, so we must update our consumer index at
-                * least that often.
-                */
-               if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-                       eq_update_ci(eq, 0);
-                       set_ci = 0;
-               }
-       }
+       } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 
+out:
        eq_update_ci(eq, 1);
 
        if (cqn != -1)
                tasklet_schedule(&eq_comp->tasklet_ctx.task);
 
-       return IRQ_HANDLED;
+       return 0;
 }
 
 /* Some architectures don't latch interrupts when they are disabled, so using
@@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 
        disable_irq(eq->core.irqn);
        count_eqe = eq->core.cons_index;
-       mlx5_eq_comp_int(eq->core.irqn, eq);
+       mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
        count_eqe = eq->core.cons_index - count_eqe;
        enable_irq(eq->core.irqn);
 
        return count_eqe;
 }
 
-static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
+static int mlx5_eq_async_int(struct notifier_block *nb,
+                            unsigned long action, void *data)
 {
-       struct mlx5_eq *eq = eq_ptr;
+       struct mlx5_eq_async *eq_async =
+               container_of(nb, struct mlx5_eq_async, irq_nb);
+       struct mlx5_eq *eq = &eq_async->core;
        struct mlx5_eq_table *eqt;
        struct mlx5_core_dev *dev;
        struct mlx5_eqe *eqe;
-       int set_ci = 0;
+       int num_eqes = 0;
 
        dev = eq->dev;
        eqt = dev->priv.eq_table;
 
-       while ((eqe = next_eqe_sw(eq))) {
+       eqe = next_eqe_sw(eq);
+       if (!eqe)
+               goto out;
+
+       do {
                /*
                 * Make sure we read EQ entry contents after we've
                 * checked the ownership bit.
@@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr)
                atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
 
                ++eq->cons_index;
-               ++set_ci;
 
-               /* The HCA will think the queue has overflowed if we
-                * don't tell it we've been processing events.  We
-                * create our EQs with MLX5_NUM_SPARE_EQE extra
-                * entries, so we must update our consumer index at
-                * least that often.
-                */
-               if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
-                       eq_update_ci(eq, 0);
-                       set_ci = 0;
-               }
-       }
+       } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 
+out:
        eq_update_ci(eq, 1);
 
-       return IRQ_HANDLED;
+       return 0;
 }
 
 static void init_eq_buf(struct mlx5_eq *eq)
@@ -248,23 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq)
 }
 
 static int
-create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
+create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
              struct mlx5_eq_param *param)
 {
-       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        struct mlx5_cq_table *cq_table = &eq->cq_table;
        u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
        struct mlx5_priv *priv = &dev->priv;
-       u8 vecidx = param->index;
+       u8 vecidx = param->irq_index;
        __be64 *pas;
        void *eqc;
        int inlen;
        u32 *in;
        int err;
 
-       if (eq_table->irq_info[vecidx].context)
-               return -EEXIST;
-
        /* Init CQ table */
        memset(cq_table, 0, sizeof(*cq_table));
        spin_lock_init(&cq_table->lock);
@@ -307,34 +299,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name,
        if (err)
                goto err_in;
 
-       snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
-                name, pci_name(dev->pdev));
-       eq_table->irq_info[vecidx].context = param->context;
-
        eq->vecidx = vecidx;
        eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
        eq->irqn = pci_irq_vector(dev->pdev, vecidx);
        eq->dev = dev;
        eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
-       err = request_irq(eq->irqn, param->handler, 0,
-                         eq_table->irq_info[vecidx].name, param->context);
-       if (err)
-               goto err_eq;
 
        err = mlx5_debug_eq_add(dev, eq);
        if (err)
-               goto err_irq;
-
-       /* EQs are created in ARMED state
-        */
-       eq_update_ci(eq, 1);
+               goto err_eq;
 
        kvfree(in);
        return 0;
 
-err_irq:
-       free_irq(eq->irqn, eq);
-
 err_eq:
        mlx5_cmd_destroy_eq(dev, eq->eqn);
 
@@ -346,18 +323,48 @@ err_buf:
        return err;
 }
 
-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+/**
+ * mlx5_eq_enable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to enable
+ * @nb - notifier call block
+ * mlx5_eq_enable - must be called after EQ is created in device.
+ */
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                  struct notifier_block *nb)
 {
        struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-       struct mlx5_irq_info *irq_info;
        int err;
 
-       irq_info = &eq_table->irq_info[eq->vecidx];
+       err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
+       if (!err)
+               eq_update_ci(eq, 1);
 
-       mlx5_debug_eq_remove(dev, eq);
+       return err;
+}
+EXPORT_SYMBOL(mlx5_eq_enable);
 
-       free_irq(eq->irqn, irq_info->context);
-       irq_info->context = NULL;
+/**
+ * mlx5_eq_disable - Enable EQ for receiving EQEs
+ * @dev - Device which owns the eq
+ * @eq - EQ to disable
+ * @nb - notifier call block
+ * mlx5_eq_disable - must be called before EQ is destroyed.
+ */
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                    struct notifier_block *nb)
+{
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+
+       mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
+}
+EXPORT_SYMBOL(mlx5_eq_disable);
+
+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
+{
+       int err;
+
+       mlx5_debug_eq_remove(dev, eq);
 
        err = mlx5_cmd_destroy_eq(dev, eq->eqn);
        if (err)
@@ -423,6 +430,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
        for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
                ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
 
+       eq_table->irq_table = dev->priv.irq_table;
        return 0;
 
 kvfree_eq_table:
@@ -439,19 +447,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
 
 /* Async EQs */
 
-static int create_async_eq(struct mlx5_core_dev *dev, const char *name,
+static int create_async_eq(struct mlx5_core_dev *dev,
                           struct mlx5_eq *eq, struct mlx5_eq_param *param)
 {
        struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        int err;
 
        mutex_lock(&eq_table->lock);
-       if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) {
-               err = -ENOSPC;
+       /* Async EQs must share irq index 0 */
+       if (param->irq_index != 0) {
+               err = -EINVAL;
                goto unlock;
        }
 
-       err = create_map_eq(dev, eq, name, param);
+       err = create_map_eq(dev, eq, param);
 unlock:
        mutex_unlock(&eq_table->lock);
        return err;
@@ -480,7 +489,7 @@ static int cq_err_event_notifier(struct notifier_block *nb,
        /* type == MLX5_EVENT_TYPE_CQ_ERROR */
 
        eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
-       eq  = &eqt->async_eq;
+       eq  = &eqt->async_eq.core;
        eqe = data;
 
        cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
@@ -549,55 +558,73 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
        MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
        mlx5_eq_notifier_register(dev, &table->cq_err_nb);
 
+       table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
        param = (struct mlx5_eq_param) {
-               .index = MLX5_EQ_CMD_IDX,
+               .irq_index = 0,
                .mask = 1ull << MLX5_EVENT_TYPE_CMD,
                .nent = MLX5_NUM_CMD_EQE,
-               .context = &table->cmd_eq,
-               .handler = mlx5_eq_async_int,
        };
-       err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, &param);
+       err = create_async_eq(dev, &table->cmd_eq.core, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
                goto err0;
        }
-
+       err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+       if (err) {
+               mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+               goto err1;
+       }
        mlx5_cmd_use_events(dev);
 
+       table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
        param = (struct mlx5_eq_param) {
-               .index = MLX5_EQ_ASYNC_IDX,
+               .irq_index = 0,
                .mask = gather_async_events_mask(dev),
                .nent = MLX5_NUM_ASYNC_EQE,
-               .context = &table->async_eq,
-               .handler = mlx5_eq_async_int,
        };
-       err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, &param);
+       err = create_async_eq(dev, &table->async_eq.core, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
-               goto err1;
+               goto err2;
+       }
+       err = mlx5_eq_enable(dev, &table->async_eq.core,
+                            &table->async_eq.irq_nb);
+       if (err) {
+               mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+               goto err3;
        }
 
+       table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
        param = (struct mlx5_eq_param) {
-               .index = MLX5_EQ_PAGEREQ_IDX,
+               .irq_index = 0,
                .mask =  1 << MLX5_EVENT_TYPE_PAGE_REQUEST,
                .nent = /* TODO: sriov max_vf + */ 1,
-               .context = &table->pages_eq,
-               .handler = mlx5_eq_async_int,
        };
-       err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, &param);
+       err = create_async_eq(dev, &table->pages_eq.core, &param);
        if (err) {
                mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
-               goto err2;
+               goto err4;
+       }
+       err = mlx5_eq_enable(dev, &table->pages_eq.core,
+                            &table->pages_eq.irq_nb);
+       if (err) {
+               mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+               goto err5;
        }
 
        return err;
 
+err5:
+       destroy_async_eq(dev, &table->pages_eq.core);
+err4:
+       mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+err3:
+       destroy_async_eq(dev, &table->async_eq.core);
 err2:
-       destroy_async_eq(dev, &table->async_eq);
-
-err1:
        mlx5_cmd_use_polling(dev);
-       destroy_async_eq(dev, &table->cmd_eq);
+       mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+err1:
+       destroy_async_eq(dev, &table->cmd_eq.core);
 err0:
        mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
        return err;
@@ -608,19 +635,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
        struct mlx5_eq_table *table = dev->priv.eq_table;
        int err;
 
-       err = destroy_async_eq(dev, &table->pages_eq);
+       mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+       err = destroy_async_eq(dev, &table->pages_eq.core);
        if (err)
                mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
                              err);
 
-       err = destroy_async_eq(dev, &table->async_eq);
+       mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+       err = destroy_async_eq(dev, &table->async_eq.core);
        if (err)
                mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
                              err);
 
        mlx5_cmd_use_polling(dev);
 
-       err = destroy_async_eq(dev, &table->cmd_eq);
+       mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+       err = destroy_async_eq(dev, &table->cmd_eq.core);
        if (err)
                mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
                              err);
@@ -630,24 +660,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
 
 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 {
-       return &dev->priv.eq_table->async_eq;
+       return &dev->priv.eq_table->async_eq.core;
 }
 
 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
 {
-       synchronize_irq(dev->priv.eq_table->async_eq.irqn);
+       synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
 }
 
 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
 {
-       synchronize_irq(dev->priv.eq_table->cmd_eq.irqn);
+       synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
 }
 
 /* Generic EQ API for mlx5_core consumers
  * Needed For RDMA ODP EQ for now
  */
 struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
+mlx5_eq_create_generic(struct mlx5_core_dev *dev,
                       struct mlx5_eq_param *param)
 {
        struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
@@ -656,7 +686,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
        if (!eq)
                return ERR_PTR(-ENOMEM);
 
-       err = create_async_eq(dev, name, eq, param);
+       err = create_async_eq(dev, eq, param);
        if (err) {
                kvfree(eq);
                eq = ERR_PTR(err);
@@ -714,84 +744,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
 }
 EXPORT_SYMBOL(mlx5_eq_update_ci);
 
-/* Completion EQs */
-
-static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-       struct mlx5_priv *priv  = &mdev->priv;
-       int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
-       int irq = pci_irq_vector(mdev->pdev, vecidx);
-       struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
-       if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) {
-               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
-               return -ENOMEM;
-       }
-
-       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
-                       irq_info->mask);
-
-       if (IS_ENABLED(CONFIG_SMP) &&
-           irq_set_affinity_hint(irq, irq_info->mask))
-               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
-
-       return 0;
-}
-
-static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
-{
-       int vecidx = MLX5_EQ_VEC_COMP_BASE + i;
-       struct mlx5_priv *priv  = &mdev->priv;
-       int irq = pci_irq_vector(mdev->pdev, vecidx);
-       struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx];
-
-       irq_set_affinity_hint(irq, NULL);
-       free_cpumask_var(irq_info->mask);
-}
-
-static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
-{
-       int err;
-       int i;
-
-       for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) {
-               err = set_comp_irq_affinity_hint(mdev, i);
-               if (err)
-                       goto err_out;
-       }
-
-       return 0;
-
-err_out:
-       for (i--; i >= 0; i--)
-               clear_comp_irq_affinity_hint(mdev, i);
-
-       return err;
-}
-
-static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
-{
-       int i;
-
-       for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++)
-               clear_comp_irq_affinity_hint(mdev, i);
-}
-
 static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
        struct mlx5_eq_comp *eq, *n;
 
-       clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
-       if (table->rmap) {
-               free_irq_cpu_rmap(table->rmap);
-               table->rmap = NULL;
-       }
-#endif
        list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
                list_del(&eq->list);
+               mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
                if (destroy_unmap_eq(dev, &eq->core))
                        mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
                                       eq->core.eqn);
@@ -803,23 +763,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 static int create_comp_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
-       char name[MLX5_MAX_IRQ_NAME];
        struct mlx5_eq_comp *eq;
-       int ncomp_vec;
+       int ncomp_eqs;
        int nent;
        int err;
        int i;
 
        INIT_LIST_HEAD(&table->comp_eqs_list);
-       ncomp_vec = table->num_comp_vectors;
+       ncomp_eqs = table->num_comp_eqs;
        nent = MLX5_COMP_EQ_SIZE;
-#ifdef CONFIG_RFS_ACCEL
-       table->rmap = alloc_irq_cpu_rmap(ncomp_vec);
-       if (!table->rmap)
-               return -ENOMEM;
-#endif
-       for (i = 0; i < ncomp_vec; i++) {
-               int vecidx = i + MLX5_EQ_VEC_COMP_BASE;
+       for (i = 0; i < ncomp_eqs; i++) {
+               int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
                struct mlx5_eq_param param = {};
 
                eq = kzalloc(sizeof(*eq), GFP_KERNEL);
@@ -834,33 +788,29 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
                tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
                             (unsigned long)&eq->tasklet_ctx);
 
-#ifdef CONFIG_RFS_ACCEL
-               irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx));
-#endif
-               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
+               eq->irq_nb.notifier_call = mlx5_eq_comp_int;
                param = (struct mlx5_eq_param) {
-                       .index = vecidx,
+                       .irq_index = vecidx,
                        .mask = 0,
                        .nent = nent,
-                       .context = &eq->core,
-                       .handler = mlx5_eq_comp_int
                };
-               err = create_map_eq(dev, &eq->core, name, &param);
+               err = create_map_eq(dev, &eq->core, &param);
+               if (err) {
+                       kfree(eq);
+                       goto clean;
+               }
+               err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
                if (err) {
+                       destroy_unmap_eq(dev, &eq->core);
                        kfree(eq);
                        goto clean;
                }
+
                mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
                /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
                list_add_tail(&eq->list, &table->comp_eqs_list);
        }
 
-       err = set_comp_irq_affinity_hints(dev);
-       if (err) {
-               mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
-               goto clean;
-       }
-
        return 0;
 
 clean:
@@ -891,22 +841,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn);
 
 unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
 {
-       return dev->priv.eq_table->num_comp_vectors;
+       return dev->priv.eq_table->num_comp_eqs;
 }
 EXPORT_SYMBOL(mlx5_comp_vectors_count);
 
 struct cpumask *
 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
 {
-       /* TODO: consider irq_get_affinity_mask(irq) */
-       return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask;
+       int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
+
+       return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
+                                         vecidx);
 }
 EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
 
 #ifdef CONFIG_RFS_ACCEL
 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
 {
-       return dev->priv.eq_table->rmap;
+       return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
 }
 #endif
 
@@ -927,82 +879,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
-       int i, max_eqs;
-
-       clear_comp_irqs_affinity_hints(dev);
-
-#ifdef CONFIG_RFS_ACCEL
-       if (table->rmap) {
-               free_irq_cpu_rmap(table->rmap);
-               table->rmap = NULL;
-       }
-#endif
 
        mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
-       max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
-       for (i = max_eqs - 1; i >= 0; i--) {
-               if (!table->irq_info[i].context)
-                       continue;
-               free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context);
-               table->irq_info[i].context = NULL;
-       }
+       mlx5_irq_table_destroy(dev);
        mutex_unlock(&table->lock);
-       pci_free_irq_vectors(dev->pdev);
-}
-
-static int alloc_irq_vectors(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-       struct mlx5_eq_table *table = priv->eq_table;
-       int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
-                     MLX5_CAP_GEN(dev, max_num_eqs) :
-                     1 << MLX5_CAP_GEN(dev, log_max_eq);
-       int nvec;
-       int err;
-
-       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-              MLX5_EQ_VEC_COMP_BASE;
-       nvec = min_t(int, nvec, num_eqs);
-       if (nvec <= MLX5_EQ_VEC_COMP_BASE)
-               return -ENOMEM;
-
-       table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL);
-       if (!table->irq_info)
-               return -ENOMEM;
-
-       nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1,
-                                    nvec, PCI_IRQ_MSIX);
-       if (nvec < 0) {
-               err = nvec;
-               goto err_free_irq_info;
-       }
-
-       table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
-
-       return 0;
-
-err_free_irq_info:
-       kfree(table->irq_info);
-       return err;
-}
-
-static void free_irq_vectors(struct mlx5_core_dev *dev)
-{
-       struct mlx5_priv *priv = &dev->priv;
-
-       pci_free_irq_vectors(dev->pdev);
-       kfree(priv->eq_table->irq_info);
 }
 
 int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 {
+       struct mlx5_eq_table *eq_table = dev->priv.eq_table;
        int err;
 
-       err = alloc_irq_vectors(dev);
-       if (err) {
-               mlx5_core_err(dev, "alloc irq vectors failed\n");
-               return err;
-       }
+       eq_table->num_comp_eqs =
+               mlx5_irq_get_num_comp(eq_table->irq_table);
 
        err = create_async_eqs(dev);
        if (err) {
@@ -1020,7 +909,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 err_comp_eqs:
        destroy_async_eqs(dev);
 err_async_eqs:
-       free_irq_vectors(dev);
        return err;
 }
 
@@ -1028,7 +916,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
 {
        destroy_comp_eqs(dev);
        destroy_async_eqs(dev);
-       free_irq_vectors(dev);
 }
 
 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
index 5414e8f..67e7697 100644 (file)
@@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
        return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+                                         void *in, int inlen)
+{
+       return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen);
+}
+
+static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
+                                      void *out, int outlen)
+{
+       u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+
+       MLX5_SET(query_esw_vport_context_in, in, opcode,
+                MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+       MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+       MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+                                        void *out, int outlen)
+{
+       return query_esw_vport_context_cmd(esw->dev, vport, out, outlen);
+}
+
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
                                  u16 vlan, u8 qos, u8 set_flags)
 {
@@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
                  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
 
        root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-                                                   vport->vport);
+                       mlx5_eswitch_vport_num_to_index(esw, vport->vport));
        if (!root_ns) {
                esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
                return -EOPNOTSUPP;
@@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
                  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
 
        root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-                                                   vport->vport);
+                       mlx5_eswitch_vport_num_to_index(esw, vport->vport));
        if (!root_ns) {
                esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
                return -EOPNOTSUPP;
@@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
 
        vport->ingress.drop_rule = NULL;
        vport->ingress.allow_rule = NULL;
+
+       esw_vport_del_ingress_acl_modify_metadata(esw, vport);
 }
 
 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
@@ -1686,10 +1712,9 @@ static int eswitch_vport_event(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-static int query_esw_functions(struct mlx5_core_dev *dev,
-                              u32 *out, int outlen)
+int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen)
 {
-       u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0};
+       u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
 
        MLX5_SET(query_esw_functions_in, in, opcode,
                 MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
@@ -1697,22 +1722,6 @@ static int query_esw_functions(struct mlx5_core_dev *dev,
        return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
-int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs)
-{
-       u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0};
-       int err;
-
-       err = query_esw_functions(dev, out, sizeof(out));
-       if (err)
-               return err;
-
-       *num_vfs = MLX5_GET(query_esw_functions_out, out,
-                           host_params_context.host_num_of_vfs);
-       esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs);
-
-       return 0;
-}
-
 /* Public E-Switch API */
 #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
 
@@ -1720,7 +1729,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
        struct mlx5_vport *vport;
        int total_nvports = 0;
-       u16 vf_nvports = 0;
        int err;
        int i, enabled_events;
 
@@ -1739,15 +1747,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
 
        if (mode == SRIOV_OFFLOADS) {
-               if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-                       err = mlx5_esw_query_functions(esw->dev, &vf_nvports);
-                       if (err)
-                               return err;
+               if (mlx5_core_is_ecpf_esw_manager(esw->dev))
                        total_nvports = esw->total_vports;
-               } else {
-                       vf_nvports = nvfs;
+               else
                        total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev);
-               }
        }
 
        esw->mode = mode;
@@ -1761,7 +1764,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        } else {
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
                mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-               err = esw_offloads_init(esw, vf_nvports, total_nvports);
+               err = esw_offloads_init(esw, nvfs, total_nvports);
        }
 
        if (err)
@@ -2480,6 +2483,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
 
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+       struct mlx5_eswitch *esw;
+
+       esw = dev->priv.eswitch;
+       return ESW_ALLOWED(esw) ? esw->offloads.encap :
+               DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
+
 bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1)
 {
        if ((dev0->priv.eswitch->mode == SRIOV_NONE &&
index 849a628..b6472cf 100644 (file)
@@ -68,6 +68,8 @@ struct vport_ingress {
        struct mlx5_flow_group *allow_spoofchk_only_grp;
        struct mlx5_flow_group *allow_untagged_only_grp;
        struct mlx5_flow_group *drop_grp;
+       int modify_metadata_id;
+       struct mlx5_flow_handle  *modify_metadata_rule;
        struct mlx5_flow_handle  *allow_rule;
        struct mlx5_flow_handle  *drop_rule;
        struct mlx5_fc           *drop_counter;
@@ -178,7 +180,7 @@ struct mlx5_esw_offload {
        const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
        u8 inline_mode;
        u64 num_flows;
-       u8 encap;
+       enum devlink_eswitch_encap_mode encap;
 };
 
 /* E-Switch MC FDB table hash node */
@@ -198,6 +200,10 @@ struct mlx5_esw_functions {
        u16                     num_vfs;
 };
 
+enum {
+       MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0),
+};
+
 struct mlx5_eswitch {
        struct mlx5_core_dev    *dev;
        struct mlx5_nb          nb;
@@ -205,6 +211,7 @@ struct mlx5_eswitch {
        struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
        struct workqueue_struct *work_queue;
        struct mlx5_vport       *vports;
+       u32 flags;
        int                     total_vports;
        int                     enabled_vports;
        /* Synchronize between vport change events
@@ -242,6 +249,8 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
                                  struct mlx5_vport *vport);
 void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
                                   struct mlx5_vport *vport);
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+                                              struct mlx5_vport *vport);
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@ -269,6 +278,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 struct ifla_vf_stats *vf_stats);
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
+int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+                                         void *in, int inlen);
+int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
+                                        void *out, int outlen);
+
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
 struct mlx5_termtbl_handle;
@@ -379,9 +393,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
                                         struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
 int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+                                       enum devlink_eswitch_encap_mode encap,
                                        struct netlink_ext_ack *extack);
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+                                       enum devlink_eswitch_encap_mode *encap);
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
@@ -409,7 +425,7 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0,
                               struct mlx5_core_dev *dev1);
 
-int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs);
+int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen);
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
@@ -528,6 +544,8 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
 struct mlx5_vport *__must_check
 mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
 
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -536,6 +554,11 @@ static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs,
 static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
 static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; }
 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+static inline int
+mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen)
+{
+       return -EOPNOTSUPP;
+}
 
 #define FDB_MAX_CHAIN 1
 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
index 060de01..af08e06 100644 (file)
@@ -88,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
        return 1;
 }
 
+static void
+mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
+                                 struct mlx5_flow_spec *spec,
+                                 struct mlx5_esw_flow_attr *attr)
+{
+       void *misc2;
+       void *misc;
+
+       /* Use metadata matching because vport is not represented by single
+        * VHCA in dual-port RoCE mode, and matching on source vport may fail.
+        */
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+               MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0,
+                        mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch,
+                                                                  attr->in_rep->vport));
+
+               misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+               MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0);
+
+               spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+               if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+                       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+       } else {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
+
+               if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+                       MLX5_SET(fte_match_set_misc, misc,
+                                source_eswitch_owner_vhca_id,
+                                MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+               if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+                       MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                                        source_eswitch_owner_vhca_id);
+
+               spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+       }
+
+       if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) &&
+           attr->in_rep->vport == MLX5_VPORT_UPLINK)
+               spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+}
+
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                struct mlx5_flow_spec *spec,
@@ -99,7 +146,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        struct mlx5_flow_handle *rule;
        struct mlx5_flow_table *fdb;
        int j, i = 0;
-       void *misc;
 
        if (esw->mode != SRIOV_OFFLOADS)
                return ERR_PTR(-EOPNOTSUPP);
@@ -159,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                i++;
        }
 
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-       MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-               MLX5_SET(fte_match_set_misc, misc,
-                        source_eswitch_owner_vhca_id,
-                        MLX5_CAP_GEN(attr->in_mdev, vhca_id));
-
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-               MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-                                source_eswitch_owner_vhca_id);
+       mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
                if (attr->tunnel_match_level != MLX5_MATCH_NONE)
                        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -223,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
        struct mlx5_flow_table *fast_fdb;
        struct mlx5_flow_table *fwd_fdb;
        struct mlx5_flow_handle *rule;
-       void *misc;
        int i;
 
        fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
@@ -255,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
        dest[i].ft = fwd_fdb,
        i++;
 
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-       MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
-
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-               MLX5_SET(fte_match_set_misc, misc,
-                        source_eswitch_owner_vhca_id,
-                        MLX5_CAP_GEN(attr->in_mdev, vhca_id));
+       mlx5_eswitch_set_rule_source_port(esw, spec, attr);
 
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
-               MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-                                source_eswitch_owner_vhca_id);
-
-       if (attr->match_level == MLX5_MATCH_NONE)
-               spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
-       else
-               spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS |
-                                              MLX5_MATCH_MISC_PARAMETERS;
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+       if (attr->match_level != MLX5_MATCH_NONE)
+               spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 
        rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
 
@@ -570,23 +588,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
        mlx5_del_flow_rules(rule);
 }
 
-static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
+static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+       u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+       u8 fdb_to_vport_reg_c_id;
+       int err;
+
+       err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+                                                  out, sizeof(out));
+       if (err)
+               return err;
+
+       fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+                                        esw_vport_context.fdb_to_vport_reg_c_id);
+
+       fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0;
+       MLX5_SET(modify_esw_vport_context_in, in,
+                esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+       MLX5_SET(modify_esw_vport_context_in, in,
+                field_select.fdb_to_vport_reg_c_id, 1);
+
+       return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+                                                    in, sizeof(in));
+}
+
+static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw)
+{
+       u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+       u8 fdb_to_vport_reg_c_id;
+       int err;
+
+       err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport,
+                                                  out, sizeof(out));
+       if (err)
+               return err;
+
+       fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out,
+                                        esw_vport_context.fdb_to_vport_reg_c_id);
+
+       fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0;
+
+       MLX5_SET(modify_esw_vport_context_in, in,
+                esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id);
+
+       MLX5_SET(modify_esw_vport_context_in, in,
+                field_select.fdb_to_vport_reg_c_id, 1);
+
+       return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport,
+                                                    in, sizeof(in));
+}
+
+static void peer_miss_rules_setup(struct mlx5_eswitch *esw,
+                                 struct mlx5_core_dev *peer_dev,
                                  struct mlx5_flow_spec *spec,
                                  struct mlx5_flow_destination *dest)
 {
-       void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-                                 misc_parameters);
+       void *misc;
 
-       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                MLX5_CAP_GEN(peer_dev, vhca_id));
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters_2);
+               MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
 
-       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+               spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+       } else {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
 
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-                           misc_parameters);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc,
-                        source_eswitch_owner_vhca_id);
+               MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(peer_dev, vhca_id));
+
+               spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                                source_eswitch_owner_vhca_id);
+       }
 
        dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest->vport.num = peer_dev->priv.eswitch->manager_vport;
@@ -594,6 +676,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
        dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 }
 
+static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw,
+                                              struct mlx5_eswitch *peer_esw,
+                                              struct mlx5_flow_spec *spec,
+                                              u16 vport)
+{
+       void *misc;
+
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters_2);
+               MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+                        mlx5_eswitch_get_vport_metadata_for_match(peer_esw,
+                                                                  vport));
+       } else {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+       }
+}
+
 static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
                                       struct mlx5_core_dev *peer_dev)
 {
@@ -611,7 +713,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
        if (!spec)
                return -ENOMEM;
 
-       peer_miss_rules_setup(peer_dev, spec, &dest);
+       peer_miss_rules_setup(esw, peer_dev, spec, &dest);
 
        flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL);
        if (!flows) {
@@ -624,7 +726,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
                            misc_parameters);
 
        if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
-               MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+               esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
+                                                  spec, MLX5_VPORT_PF);
+
                flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
                                           spec, &flow_act, &dest, 1);
                if (IS_ERR(flow)) {
@@ -646,7 +750,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
        }
 
        mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
-               MLX5_SET(fte_match_set_misc, misc, source_port, i);
+               esw_set_peer_miss_rule_source_port(esw,
+                                                  peer_dev->priv.eswitch,
+                                                  spec, i);
+
                flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
                                           spec, &flow_act, &dest, 1);
                if (IS_ERR(flow)) {
@@ -930,6 +1037,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
+static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
+                                          u32 *flow_group_in)
+{
+       void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+                                           flow_group_in,
+                                           match_criteria);
+
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               MLX5_SET(create_flow_group_in, flow_group_in,
+                        match_criteria_enable,
+                        MLX5_MATCH_MISC_PARAMETERS_2);
+
+               MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                                misc_parameters_2.metadata_reg_c_0);
+       } else {
+               MLX5_SET(create_flow_group_in, flow_group_in,
+                        match_criteria_enable,
+                        MLX5_MATCH_MISC_PARAMETERS);
+
+               MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                                misc_parameters.source_port);
+       }
+}
+
 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
@@ -1027,19 +1158,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 
        /* create peer esw miss group */
        memset(flow_group_in, 0, inlen);
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-                MLX5_MATCH_MISC_PARAMETERS);
 
-       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
-                                     match_criteria);
+       esw_set_flow_group_source_port(esw, flow_group_in);
+
+       if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+                                             flow_group_in,
+                                             match_criteria);
 
-       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-                        misc_parameters.source_port);
-       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
-                        misc_parameters.source_eswitch_owner_vhca_id);
+               MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                                misc_parameters.source_eswitch_owner_vhca_id);
+
+               MLX5_SET(create_flow_group_in, flow_group_in,
+                        source_eswitch_owner_vhca_id_valid, 1);
+       }
 
-       MLX5_SET(create_flow_group_in, flow_group_in,
-                source_eswitch_owner_vhca_id_valid, 1);
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
                 ix + esw->total_vports - 1);
@@ -1153,7 +1286,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        struct mlx5_flow_group *g;
        u32 *flow_group_in;
-       void *match_criteria, *misc;
        int err = 0;
 
        nvports = nvports + MLX5_ESW_MISS_FLOWS;
@@ -1163,12 +1295,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
 
        /* create vport rx group */
        memset(flow_group_in, 0, inlen);
-       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-                MLX5_MATCH_MISC_PARAMETERS);
 
-       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-       misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       esw_set_flow_group_source_port(esw, flow_group_in);
 
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1);
@@ -1207,13 +1335,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport,
                goto out;
        }
 
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-       MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+               MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+                        mlx5_eswitch_get_vport_metadata_for_match(esw, vport));
 
-       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+               MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0);
 
-       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+               spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+       } else {
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+               MLX5_SET(fte_match_set_misc, misc, source_port, vport);
+
+               misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+               MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+               spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+       }
 
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
@@ -1449,34 +1588,13 @@ err_reps:
        return err;
 }
 
-static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports,
-                                u8 rep_type)
-{
-       int err;
-
-       /* Special vports must be loaded first. */
-       err = __load_reps_special_vport(esw, rep_type);
-       if (err)
-               return err;
-
-       err = __load_reps_vf_vport(esw, nvports, rep_type);
-       if (err)
-               goto err_vfs;
-
-       return 0;
-
-err_vfs:
-       __unload_reps_special_vport(esw, rep_type);
-       return err;
-}
-
-static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
+static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw)
 {
        u8 rep_type = 0;
        int err;
 
        for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
-               err = __load_reps_all_vport(esw, nvports, rep_type);
+               err = __load_reps_special_vport(esw, rep_type);
                if (err)
                        goto err_reps;
        }
@@ -1485,7 +1603,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports)
 
 err_reps:
        while (rep_type-- > 0)
-               __unload_reps_all_vport(esw, nvports, rep_type);
+               __unload_reps_special_vport(esw, rep_type);
        return err;
 }
 
@@ -1521,6 +1639,10 @@ static int mlx5_esw_offloads_devcom_event(int event,
 
        switch (event) {
        case ESW_OFFLOADS_DEVCOM_PAIR:
+               if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
+                   mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+                       break;
+
                err = mlx5_esw_offloads_pair(esw, peer_esw);
                if (err)
                        goto err_out;
@@ -1589,32 +1711,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
 static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
                                             struct mlx5_vport *vport)
 {
-       struct mlx5_core_dev *dev = esw->dev;
        struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_spec *spec;
        int err = 0;
 
        /* For prio tag mode, there is only 1 FTEs:
-        * 1) Untagged packets - push prio tag VLAN, allow
+        * 1) Untagged packets - push prio tag VLAN and modify metadata if
+        * required, allow
         * Unmatched traffic is allowed by default
         */
 
-       if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
-               return -EOPNOTSUPP;
-
-       esw_vport_cleanup_ingress_rules(esw, vport);
-
-       err = esw_vport_enable_ingress_acl(esw, vport);
-       if (err) {
-               mlx5_core_warn(esw->dev,
-                              "failed to enable prio tag ingress acl (%d) on vport[%d]\n",
-                              err, vport->vport);
-               return err;
-       }
-
-       esw_debug(esw->dev,
-                 "vport[%d] configure ingress rules\n", vport->vport);
-
        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
        if (!spec) {
                err = -ENOMEM;
@@ -1630,6 +1736,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
        flow_act.vlan[0].ethtype = ETH_P_8021Q;
        flow_act.vlan[0].vid = 0;
        flow_act.vlan[0].prio = 0;
+
+       if (vport->ingress.modify_metadata_rule) {
+               flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+               flow_act.modify_id = vport->ingress.modify_metadata_id;
+       }
+
        vport->ingress.allow_rule =
                mlx5_add_flow_rules(vport->ingress.acl, spec,
                                    &flow_act, NULL, 0);
@@ -1650,6 +1762,58 @@ out_no_mem:
        return err;
 }
 
+static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+                                                    struct mlx5_vport *vport)
+{
+       u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_flow_spec spec = {};
+       int err = 0;
+
+       MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+       MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+       MLX5_SET(set_action_in, action, data,
+                mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
+
+       err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                      1, action, &vport->ingress.modify_metadata_id);
+       if (err) {
+               esw_warn(esw->dev,
+                        "failed to alloc modify header for vport %d ingress acl (%d)\n",
+                        vport->vport, err);
+               return err;
+       }
+
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+       flow_act.modify_id = vport->ingress.modify_metadata_id;
+       vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
+                                                                 &spec, &flow_act, NULL, 0);
+       if (IS_ERR(vport->ingress.modify_metadata_rule)) {
+               err = PTR_ERR(vport->ingress.modify_metadata_rule);
+               esw_warn(esw->dev,
+                        "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+                        vport->vport, err);
+               vport->ingress.modify_metadata_rule = NULL;
+               goto out;
+       }
+
+out:
+       if (err)
+               mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+       return err;
+}
+
+void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
+                                              struct mlx5_vport *vport)
+{
+       if (vport->ingress.modify_metadata_rule) {
+               mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
+               mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+
+               vport->ingress.modify_metadata_rule = NULL;
+       }
+}
+
 static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
                                            struct mlx5_vport *vport)
 {
@@ -1657,6 +1821,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
        struct mlx5_flow_spec *spec;
        int err = 0;
 
+       if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+               return 0;
+
        /* For prio tag mode, there is only 1 FTEs:
         * 1) prio tag packets - pop the prio tag VLAN, allow
         * Unmatched traffic is allowed by default
@@ -1710,27 +1877,98 @@ out_no_mem:
        return err;
 }
 
-static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
+static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
+                                          struct mlx5_vport *vport)
 {
-       struct mlx5_vport *vport = NULL;
-       int i, j;
        int err;
 
-       mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
+       if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+           !MLX5_CAP_GEN(esw->dev, prio_tag_required))
+               return 0;
+
+       esw_vport_cleanup_ingress_rules(esw, vport);
+
+       err = esw_vport_enable_ingress_acl(esw, vport);
+       if (err) {
+               esw_warn(esw->dev,
+                        "failed to enable ingress acl (%d) on vport[%d]\n",
+                        err, vport->vport);
+               return err;
+       }
+
+       esw_debug(esw->dev,
+                 "vport[%d] configure ingress rules\n", vport->vport);
+
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
+               if (err)
+                       goto out;
+       }
+
+       if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+           mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
                err = esw_vport_ingress_prio_tag_config(esw, vport);
                if (err)
-                       goto err_ingress;
-               err = esw_vport_egress_prio_tag_config(esw, vport);
+                       goto out;
+       }
+
+out:
+       if (err)
+               esw_vport_disable_ingress_acl(esw, vport);
+       return err;
+}
+
+static bool
+esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+{
+       if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
+               return false;
+
+       if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) &
+             MLX5_FDB_TO_VPORT_REG_C_0))
+               return false;
+
+       if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+               return false;
+
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+           mlx5_ecpf_vport_exists(esw->dev))
+               return false;
+
+       return true;
+}
+
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
+{
+       struct mlx5_vport *vport;
+       int i, j;
+       int err;
+
+       if (esw_check_vport_match_metadata_supported(esw))
+               esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
+       mlx5_esw_for_all_vports(esw, i, vport) {
+               err = esw_vport_ingress_common_config(esw, vport);
                if (err)
-                       goto err_egress;
+                       goto err_ingress;
+
+               if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
+                       err = esw_vport_egress_prio_tag_config(esw, vport);
+                       if (err)
+                               goto err_egress;
+               }
        }
 
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+               esw_info(esw->dev, "Use metadata reg_c as source vport to match\n");
+
        return 0;
 
 err_egress:
        esw_vport_disable_ingress_acl(esw, vport);
 err_ingress:
-       mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
+       for (j = MLX5_VPORT_PF; j < i; j++) {
+               vport = &esw->vports[j];
                esw_vport_disable_egress_acl(esw, vport);
                esw_vport_disable_ingress_acl(esw, vport);
        }
@@ -1738,15 +1976,17 @@ err_ingress:
        return err;
 }
 
-static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
        struct mlx5_vport *vport;
        int i;
 
-       mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) {
+       mlx5_esw_for_all_vports(esw, i, vport) {
                esw_vport_disable_egress_acl(esw, vport);
                esw_vport_disable_ingress_acl(esw, vport);
        }
+
+       esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 }
 
 static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
@@ -1757,15 +1997,13 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports,
        memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
        mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
 
-       if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
-               err = esw_prio_tag_acls_config(esw, vf_nvports);
-               if (err)
-                       return err;
-       }
+       err = esw_create_offloads_acl_tables(esw);
+       if (err)
+               return err;
 
        err = esw_create_offloads_fdb_tables(esw, nvports);
        if (err)
-               return err;
+               goto create_fdb_err;
 
        err = esw_create_offloads_table(esw, nvports);
        if (err)
@@ -1783,6 +2021,9 @@ create_fg_err:
 create_ft_err:
        esw_destroy_offloads_fdb_tables(esw);
 
+create_fdb_err:
+       esw_destroy_offloads_acl_tables(esw);
+
        return err;
 }
 
@@ -1791,12 +2032,12 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
        esw_destroy_vport_rx_group(esw);
        esw_destroy_offloads_table(esw);
        esw_destroy_offloads_fdb_tables(esw);
-       if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
-               esw_prio_tag_acls_cleanup(esw);
+       esw_destroy_offloads_acl_tables(esw);
 }
 
 static void esw_functions_changed_event_handler(struct work_struct *work)
 {
+       u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
        struct mlx5_host_work *host_work;
        struct mlx5_eswitch *esw;
        u16 num_vfs = 0;
@@ -1805,7 +2046,9 @@ static void esw_functions_changed_event_handler(struct work_struct *work)
        host_work = container_of(work, struct mlx5_host_work, work);
        esw = host_work->esw;
 
-       err = mlx5_esw_query_functions(esw->dev, &num_vfs);
+       err = mlx5_esw_query_functions(esw->dev, out, sizeof(out));
+       num_vfs = MLX5_GET(query_esw_functions_out, out,
+                          host_params_context.host_num_of_vfs);
        if (err || num_vfs == esw->esw_funcs.num_vfs)
                goto out;
 
@@ -1825,6 +2068,21 @@ out:
        kfree(host_work);
 }
 
+static void esw_emulate_event_handler(struct work_struct *work)
+{
+       struct mlx5_host_work *host_work =
+               container_of(work, struct mlx5_host_work, work);
+       struct mlx5_eswitch *esw = host_work->esw;
+       int err;
+
+       if (esw->esw_funcs.num_vfs) {
+               err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs);
+               if (err)
+                       esw_warn(esw->dev, "Load vf reps err=%d\n", err);
+       }
+       kfree(host_work);
+}
+
 static int esw_functions_changed_event(struct notifier_block *nb,
                                       unsigned long type, void *data)
 {
@@ -1841,7 +2099,11 @@ static int esw_functions_changed_event(struct notifier_block *nb,
 
        host_work->esw = esw;
 
-       INIT_WORK(&host_work->work, esw_functions_changed_event_handler);
+       if (mlx5_eswitch_is_funcs_handler(esw->dev))
+               INIT_WORK(&host_work->work,
+                         esw_functions_changed_event_handler);
+       else
+               INIT_WORK(&host_work->work, esw_emulate_event_handler);
        queue_work(esw->work_queue, &host_work->work);
 
        return NOTIFY_OK;
@@ -1850,13 +2112,14 @@ static int esw_functions_changed_event(struct notifier_block *nb,
 static void esw_functions_changed_event_init(struct mlx5_eswitch *esw,
                                             u16 vf_nvports)
 {
-       if (!mlx5_eswitch_is_funcs_handler(esw->dev))
-               return;
-
-       MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
-                    ESW_FUNCTIONS_CHANGED);
-       mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
-       esw->esw_funcs.num_vfs = vf_nvports;
+       if (mlx5_eswitch_is_funcs_handler(esw->dev)) {
+               esw->esw_funcs.num_vfs = 0;
+               MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event,
+                            ESW_FUNCTIONS_CHANGED);
+               mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb);
+       } else {
+               esw->esw_funcs.num_vfs = vf_nvports;
+       }
 }
 
 static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw)
@@ -1877,7 +2140,17 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
        if (err)
                return err;
 
-       err = esw_offloads_load_all_reps(esw, vf_nvports);
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+               err = mlx5_eswitch_enable_passing_vport_metadata(esw);
+               if (err)
+                       goto err_vport_metadata;
+       }
+
+       /* Only load special vports reps. VF reps will be loaded in
+        * context of functions_changed event handler through real
+        * or emulated event.
+        */
+       err = esw_offloads_load_special_vport(esw);
        if (err)
                goto err_reps;
 
@@ -1888,9 +2161,22 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
 
        mlx5_rdma_enable_roce(esw->dev);
 
+       /* Call esw_functions_changed event to load VF reps:
+        * 1. HW does not support the event then emulate it
+        * Or
+        * 2. The event was already notified when num_vfs changed
+        * and eswitch was in legacy mode
+        */
+       esw_functions_changed_event(&esw->esw_funcs.nb.nb,
+                                   MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED,
+                                   NULL);
+
        return 0;
 
 err_reps:
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+               mlx5_eswitch_disable_passing_vport_metadata(esw);
+err_vport_metadata:
        esw_offloads_steering_cleanup(esw);
        return err;
 }
@@ -1916,18 +2202,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 
 void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 {
-       u16 num_vfs;
-
        esw_functions_changed_event_cleanup(esw);
-
-       if (mlx5_eswitch_is_funcs_handler(esw->dev))
-               num_vfs = esw->esw_funcs.num_vfs;
-       else
-               num_vfs = esw->dev->priv.sriov.num_vfs;
-
        mlx5_rdma_disable_roce(esw->dev);
        esw_offloads_devcom_cleanup(esw);
-       esw_offloads_unload_all_reps(esw, num_vfs);
+       esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs);
+       if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+               mlx5_eswitch_disable_passing_vport_metadata(esw);
        esw_offloads_steering_cleanup(esw);
 }
 
@@ -2167,7 +2447,8 @@ out:
        return 0;
 }
 
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
+                                       enum devlink_eswitch_encap_mode encap,
                                        struct netlink_ext_ack *extack)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -2216,7 +2497,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
        return err;
 }
 
-int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
+int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
+                                       enum devlink_eswitch_encap_mode *encap)
 {
        struct mlx5_core_dev *dev = devlink_priv(devlink);
        struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -2295,3 +2577,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
        return mlx5_eswitch_get_rep(esw, vport);
 }
 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
+
+bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
+{
+       return vport_num >= MLX5_VPORT_FIRST_VF &&
+              vport_num <= esw->dev->priv.sriov.max_vfs;
+}
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+       return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA);
+}
+EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
+
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+                                             u16 vport_num)
+{
+       return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
index 52c47d3..c76da30 100644 (file)
@@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
                                           u8 match_criteria_enable,
                                           const u32 *match_c,
                                           const u32 *match_v,
-                                          struct mlx5_flow_act *flow_act)
+                                          struct mlx5_flow_act *flow_act,
+                                          struct mlx5_flow_context *flow_context)
 {
        const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           outer_headers);
@@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
            (match_criteria_enable &
             ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
            (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
-            (flow_act->flags & FLOW_ACT_HAS_TAG))
+            (flow_context->flags & FLOW_CONTEXT_HAS_TAG))
                return false;
 
        return true;
@@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
                                            fg->mask.match_criteria_enable,
                                            fg->mask.match_criteria,
                                            fte->val,
-                                           &fte->action))
+                                           &fte->action,
+                                           &fte->flow_context))
                return ERR_PTR(-EINVAL);
        else if (!mlx5_is_fpga_ipsec_rule(mdev,
                                          fg->mask.match_criteria_enable,
index bb24c37..7ac1249 100644 (file)
@@ -396,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
        in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
        MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 
-       MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
+       MLX5_SET(flow_context, in_flow_context, flow_tag,
+                fte->flow_context.flow_tag);
+       MLX5_SET(flow_context, in_flow_context, flow_source,
+                fte->flow_context.flow_source);
+
        MLX5_SET(flow_context, in_flow_context, extended_destination,
                 extended_dest);
        if (extended_dest) {
@@ -771,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
                max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
                table_type = FS_FT_NIC_TX;
                break;
+       case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+               max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
+               table_type = FS_FT_ESW_INGRESS_ACL;
+               break;
        default:
                return -EOPNOTSUPP;
        }
index fe76c6f..585e7ad 100644 (file)
@@ -584,7 +584,7 @@ err_ida_remove:
 }
 
 static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
-                               u32 *match_value,
+                               struct mlx5_flow_spec *spec,
                                struct mlx5_flow_act *flow_act)
 {
        struct mlx5_flow_steering *steering = get_steering(&ft->node);
@@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
        if (!fte)
                return ERR_PTR(-ENOMEM);
 
-       memcpy(fte->val, match_value, sizeof(fte->val));
+       memcpy(fte->val, &spec->match_value, sizeof(fte->val));
        fte->node.type =  FS_TYPE_FLOW_ENTRY;
        fte->action = *flow_act;
+       fte->flow_context = spec->flow_context;
 
        tree_init_node(&fte->node, NULL, del_sw_fte);
 
@@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
        return false;
 }
 
-static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+static int check_conflicting_ftes(struct fs_fte *fte,
+                                 const struct mlx5_flow_context *flow_context,
+                                 const struct mlx5_flow_act *flow_act)
 {
        if (check_conflicting_actions(flow_act->action, fte->action.action)) {
                mlx5_core_warn(get_dev(&fte->node),
@@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
                return -EEXIST;
        }
 
-       if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
-           fte->action.flow_tag != flow_act->flow_tag) {
+       if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) &&
+           fte->flow_context.flow_tag != flow_context->flow_tag) {
                mlx5_core_warn(get_dev(&fte->node),
                               "FTE flow tag %u already exists with different flow tag %u\n",
-                              fte->action.flow_tag,
-                              flow_act->flow_tag);
+                              fte->flow_context.flow_tag,
+                              flow_context->flow_tag);
                return -EEXIST;
        }
 
@@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
 }
 
 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
-                                           u32 *match_value,
+                                           struct mlx5_flow_spec *spec,
                                            struct mlx5_flow_act *flow_act,
                                            struct mlx5_flow_destination *dest,
                                            int dest_num,
@@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
        int i;
        int ret;
 
-       ret = check_conflicting_ftes(fte, flow_act);
+       ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act);
        if (ret)
                return ERR_PTR(ret);
 
@@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
        u64  version;
        int err;
 
-       fte = alloc_fte(ft, spec->match_value, flow_act);
+       fte = alloc_fte(ft, spec, flow_act);
        if (IS_ERR(fte))
                return  ERR_PTR(-ENOMEM);
 
@@ -1653,8 +1656,7 @@ search_again_locked:
                fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
                if (!fte_tmp)
                        continue;
-               rule = add_rule_fg(g, spec->match_value,
-                                  flow_act, dest, dest_num, fte_tmp);
+               rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp);
                up_write_ref_node(&fte_tmp->node, false);
                tree_put_node(&fte_tmp->node, false);
                kmem_cache_free(steering->ftes_cache, fte);
@@ -1701,8 +1703,7 @@ skip_search:
 
                nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
                up_write_ref_node(&g->node, false);
-               rule = add_rule_fg(g, spec->match_value,
-                                  flow_act, dest, dest_num, fte);
+               rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
                up_write_ref_node(&fte->node, false);
                tree_put_node(&fte->node, false);
                return rule;
@@ -1788,7 +1789,7 @@ search_again_locked:
        if (err)
                goto err_release_fg;
 
-       fte = alloc_fte(ft, spec->match_value, flow_act);
+       fte = alloc_fte(ft, spec, flow_act);
        if (IS_ERR(fte)) {
                err = PTR_ERR(fte);
                goto err_release_fg;
@@ -1802,8 +1803,7 @@ search_again_locked:
 
        nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
        up_write_ref_node(&g->node, false);
-       rule = add_rule_fg(g, spec->match_value, flow_act, dest,
-                          dest_num, fte);
+       rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte);
        up_write_ref_node(&fte->node, false);
        tree_put_node(&fte->node, false);
        tree_put_node(&g->node, false);
index a08c3d0..c48c382 100644 (file)
@@ -170,6 +170,7 @@ struct fs_fte {
        u32                             val[MLX5_ST_SZ_DW_MATCH_PARAM];
        u32                             dests_size;
        u32                             index;
+       struct mlx5_flow_context        flow_context;
        struct mlx5_flow_act            action;
        enum fs_fte_status              status;
        struct mlx5_fc                  *counter;
index c0fb6d7..24bd991 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/mlx5/eq.h>
 #include <linux/mlx5/cq.h>
 
-#define MLX5_MAX_IRQ_NAME   (32)
 #define MLX5_EQE_SIZE       (sizeof(struct mlx5_eqe))
 
 struct mlx5_eq_tasklet {
@@ -36,8 +35,14 @@ struct mlx5_eq {
        struct mlx5_rsc_debug   *dbg;
 };
 
+struct mlx5_eq_async {
+       struct mlx5_eq          core;
+       struct notifier_block   irq_nb;
+};
+
 struct mlx5_eq_comp {
-       struct mlx5_eq          core; /* Must be first */
+       struct mlx5_eq          core;
+       struct notifier_block   irq_nb;
        struct mlx5_eq_tasklet  tasklet_ctx;
        struct list_head        list;
 };
index 998eec9..072b56f 100644 (file)
@@ -172,18 +172,28 @@ static struct mlx5_profile profile[] = {
 
 #define FW_INIT_TIMEOUT_MILI           2000
 #define FW_INIT_WAIT_MS                        2
-#define FW_PRE_INIT_TIMEOUT_MILI       10000
+#define FW_PRE_INIT_TIMEOUT_MILI       120000
+#define FW_INIT_WARN_MESSAGE_INTERVAL  20000
 
-static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
+static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
+                       u32 warn_time_mili)
 {
+       unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
        unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
        int err = 0;
 
+       BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
+
        while (fw_initializing(dev)) {
                if (time_after(jiffies, end)) {
                        err = -EBUSY;
                        break;
                }
+               if (warn_time_mili && time_after(jiffies, warn)) {
+                       mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
+                                      jiffies_to_msecs(end - warn) / 1000);
+                       warn = jiffies + msecs_to_jiffies(warn_time_mili);
+               }
                msleep(FW_INIT_WAIT_MS);
        }
 
@@ -799,10 +809,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
                goto err_devcom;
        }
 
+       err = mlx5_irq_table_init(dev);
+       if (err) {
+               mlx5_core_err(dev, "failed to initialize irq table\n");
+               goto err_devcom;
+       }
+
        err = mlx5_eq_table_init(dev);
        if (err) {
                mlx5_core_err(dev, "failed to initialize eq\n");
-               goto err_devcom;
+               goto err_irq_cleanup;
        }
 
        err = mlx5_events_init(dev);
@@ -840,32 +856,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
                goto err_rl_cleanup;
        }
 
-       err = mlx5_eswitch_init(dev);
+       err = mlx5_sriov_init(dev);
        if (err) {
-               mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+               mlx5_core_err(dev, "Failed to init sriov %d\n", err);
                goto err_mpfs_cleanup;
        }
 
-       err = mlx5_sriov_init(dev);
+       err = mlx5_eswitch_init(dev);
        if (err) {
-               mlx5_core_err(dev, "Failed to init sriov %d\n", err);
-               goto err_eswitch_cleanup;
+               mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
+               goto err_sriov_cleanup;
        }
 
        err = mlx5_fpga_init(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
-               goto err_sriov_cleanup;
+               goto err_eswitch_cleanup;
        }
 
        dev->tracer = mlx5_fw_tracer_create(dev);
 
        return 0;
 
-err_sriov_cleanup:
-       mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
        mlx5_eswitch_cleanup(dev->priv.eswitch);
+err_sriov_cleanup:
+       mlx5_sriov_cleanup(dev);
 err_mpfs_cleanup:
        mlx5_mpfs_cleanup(dev);
 err_rl_cleanup:
@@ -880,6 +896,8 @@ err_events_cleanup:
        mlx5_events_cleanup(dev);
 err_eq_cleanup:
        mlx5_eq_table_cleanup(dev);
+err_irq_cleanup:
+       mlx5_irq_table_cleanup(dev);
 err_devcom:
        mlx5_devcom_unregister_device(dev->priv.devcom);
 
@@ -890,8 +908,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
        mlx5_fw_tracer_destroy(dev->tracer);
        mlx5_fpga_cleanup(dev);
-       mlx5_sriov_cleanup(dev);
        mlx5_eswitch_cleanup(dev->priv.eswitch);
+       mlx5_sriov_cleanup(dev);
        mlx5_mpfs_cleanup(dev);
        mlx5_cleanup_rl_table(dev);
        mlx5_geneve_destroy(dev->geneve);
@@ -903,6 +921,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
        mlx5_cq_debugfs_cleanup(dev);
        mlx5_events_cleanup(dev);
        mlx5_eq_table_cleanup(dev);
+       mlx5_irq_table_cleanup(dev);
        mlx5_devcom_unregister_device(dev->priv.devcom);
 }
 
@@ -919,7 +938,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 
        /* wait for firmware to accept initialization segments configurations
         */
-       err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
+       err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
        if (err) {
                mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
                              FW_PRE_INIT_TIMEOUT_MILI);
@@ -932,7 +951,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
                return err;
        }
 
-       err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
+       err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
        if (err) {
                mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
                              FW_INIT_TIMEOUT_MILI);
@@ -1036,6 +1055,12 @@ static int mlx5_load(struct mlx5_core_dev *dev)
        mlx5_events_start(dev);
        mlx5_pagealloc_start(dev);
 
+       err = mlx5_irq_table_create(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc IRQs\n");
+               goto err_irq_table;
+       }
+
        err = mlx5_eq_table_create(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to create EQs\n");
@@ -1107,6 +1132,8 @@ err_fpga_start:
 err_fw_tracer:
        mlx5_eq_table_destroy(dev);
 err_eq_table:
+       mlx5_irq_table_destroy(dev);
+err_irq_table:
        mlx5_pagealloc_stop(dev);
        mlx5_events_stop(dev);
        mlx5_put_uars_page(dev, dev->priv.uar);
@@ -1123,6 +1150,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
        mlx5_fpga_device_stop(dev);
        mlx5_fw_tracer_cleanup(dev->tracer);
        mlx5_eq_table_destroy(dev);
+       mlx5_irq_table_destroy(dev);
        mlx5_pagealloc_stop(dev);
        mlx5_events_stop(dev);
        mlx5_put_uars_page(dev, dev->priv.uar);
index 29bb61a..9587697 100644 (file)
@@ -159,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
 void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove(struct mlx5_core_dev *dev);
 
+int mlx5_irq_table_init(struct mlx5_core_dev *dev);
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
+int mlx5_irq_table_create(struct mlx5_core_dev *dev);
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+                      struct notifier_block *nb);
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+                      struct notifier_block *nb);
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table);
+
 int mlx5_events_init(struct mlx5_core_dev *dev);
 void mlx5_events_cleanup(struct mlx5_core_dev *dev);
 void mlx5_events_start(struct mlx5_core_dev *dev);
index ea744d8..9231b39 100644 (file)
 
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev)
 {
-       struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-
-       memset(table, 0, sizeof(*table));
-       rwlock_init(&table->lock);
-       INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+       xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ);
 }
 
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 {
+       WARN_ON(!xa_empty(&dev->priv.mkey_table));
 }
 
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
@@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
                             mlx5_async_cbk_t callback,
                             struct mlx5_async_work *context)
 {
-       struct mlx5_mkey_table *table = &dev->priv.mkey_table;
        u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+       struct xarray *mkeys = &dev->priv.mkey_table;
        u32 mkey_index;
        void *mkc;
        int err;
@@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
        mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
                      mkey_index, key, mkey->key);
 
-       /* connect to mkey tree */
-       write_lock_irq(&table->lock);
-       err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey);
-       write_unlock_irq(&table->lock);
+       err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey,
+                                 GFP_KERNEL));
        if (err) {
-               mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n",
+               mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n",
                               mlx5_base_mkey(mkey->key), err);
                mlx5_core_destroy_mkey(dev, mkey);
        }
@@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
                           struct mlx5_core_mkey *mkey)
 {
-       struct mlx5_mkey_table *table = &dev->priv.mkey_table;
        u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
+       struct xarray *mkeys = &dev->priv.mkey_table;
        struct mlx5_core_mkey *deleted_mkey;
        unsigned long flags;
 
-       write_lock_irqsave(&table->lock, flags);
-       deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
-       write_unlock_irqrestore(&table->lock, flags);
+       xa_lock_irqsave(mkeys, flags);
+       deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+       xa_unlock_irqrestore(mkeys, flags);
        if (!deleted_mkey) {
-               mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n",
+               mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
                              mlx5_base_mkey(mkey->key));
                return -ENOENT;
        }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
new file mode 100644 (file)
index 0000000..373981a
--- /dev/null
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
+
+#define MLX5_MAX_IRQ_NAME (32)
+
+struct mlx5_irq {
+       struct atomic_notifier_head nh;
+       cpumask_var_t mask;
+       char name[MLX5_MAX_IRQ_NAME];
+};
+
+struct mlx5_irq_table {
+       struct mlx5_irq *irq;
+       int nvec;
+#ifdef CONFIG_RFS_ACCEL
+       struct cpu_rmap *rmap;
+#endif
+};
+
+int mlx5_irq_table_init(struct mlx5_core_dev *dev)
+{
+       struct mlx5_irq_table *irq_table;
+
+       irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL);
+       if (!irq_table)
+               return -ENOMEM;
+
+       dev->priv.irq_table = irq_table;
+       return 0;
+}
+
+void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
+{
+       kvfree(dev->priv.irq_table);
+}
+
+int mlx5_irq_get_num_comp(struct mlx5_irq_table *table)
+{
+       return table->nvec - MLX5_IRQ_VEC_COMP_BASE;
+}
+
+static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
+{
+       struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+       return &irq_table->irq[vecidx];
+}
+
+int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+                      struct notifier_block *nb)
+{
+       struct mlx5_irq *irq;
+
+       irq = &irq_table->irq[vecidx];
+       return atomic_notifier_chain_register(&irq->nh, nb);
+}
+
+int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
+                      struct notifier_block *nb)
+{
+       struct mlx5_irq *irq;
+
+       irq = &irq_table->irq[vecidx];
+       return atomic_notifier_chain_unregister(&irq->nh, nb);
+}
+
+static irqreturn_t mlx5_irq_int_handler(int irq, void *nh)
+{
+       atomic_notifier_call_chain(nh, 0, NULL);
+       return IRQ_HANDLED;
+}
+
+static void irq_set_name(char *name, int vecidx)
+{
+       if (vecidx == 0) {
+               snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async");
+               return;
+       }
+
+       snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
+                vecidx - MLX5_IRQ_VEC_COMP_BASE);
+       return;
+}
+
+static int request_irqs(struct mlx5_core_dev *dev, int nvec)
+{
+       char name[MLX5_MAX_IRQ_NAME];
+       int err;
+       int i;
+
+       for (i = 0; i < nvec; i++) {
+               struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+               int irqn = pci_irq_vector(dev->pdev, i);
+
+               irq_set_name(name, i);
+               ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
+               snprintf(irq->name, MLX5_MAX_IRQ_NAME,
+                        "%s@pci:%s", name, pci_name(dev->pdev));
+               err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name,
+                                 &irq->nh);
+               if (err) {
+                       mlx5_core_err(dev, "Failed to request irq\n");
+                       goto err_request_irq;
+               }
+       }
+       return 0;
+
+err_request_irq:
+       for (; i >= 0; i--) {
+               struct mlx5_irq *irq = mlx5_irq_get(dev, i);
+               int irqn = pci_irq_vector(dev->pdev, i);
+
+               free_irq(irqn, &irq->nh);
+       }
+       return  err;
+}
+
+static void irq_clear_rmap(struct mlx5_core_dev *dev)
+{
+#ifdef CONFIG_RFS_ACCEL
+       struct mlx5_irq_table *irq_table = dev->priv.irq_table;
+
+       free_irq_cpu_rmap(irq_table->rmap);
+#endif
+}
+
+static int irq_set_rmap(struct mlx5_core_dev *mdev)
+{
+       int err = 0;
+#ifdef CONFIG_RFS_ACCEL
+       struct mlx5_irq_table *irq_table = mdev->priv.irq_table;
+       int num_affinity_vec;
+       int vecidx;
+
+       num_affinity_vec = mlx5_irq_get_num_comp(irq_table);
+       irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec);
+       if (!irq_table->rmap) {
+               err = -ENOMEM;
+               mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
+               goto err_out;
+       }
+
+       vecidx = MLX5_IRQ_VEC_COMP_BASE;
+       for (; vecidx < irq_table->nvec; vecidx++) {
+               err = irq_cpu_rmap_add(irq_table->rmap,
+                                      pci_irq_vector(mdev->pdev, vecidx));
+               if (err) {
+                       mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
+                                     err);
+                       goto err_irq_cpu_rmap_add;
+               }
+       }
+       return 0;
+
+err_irq_cpu_rmap_add:
+       irq_clear_rmap(mdev);
+err_out:
+#endif
+       return err;
+}
+
+/* Completion IRQ vectors */
+
+static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+       struct mlx5_irq *irq;
+       int irqn;
+
+       irq = mlx5_irq_get(mdev, vecidx);
+       irqn = pci_irq_vector(mdev->pdev, vecidx);
+       if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node),
+                       irq->mask);
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irqn, irq->mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x",
+                              irqn);
+
+       return 0;
+}
+
+static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       int vecidx = MLX5_IRQ_VEC_COMP_BASE + i;
+       struct mlx5_irq *irq;
+       int irqn;
+
+       irq = mlx5_irq_get(mdev, vecidx);
+       irqn = pci_irq_vector(mdev->pdev, vecidx);
+       irq_set_affinity_hint(irqn, NULL);
+       free_cpumask_var(irq->mask);
+}
+
+static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+       int err;
+       int i;
+
+       for (i = 0; i < nvec; i++) {
+               err = set_comp_irq_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               clear_comp_irq_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table);
+       int i;
+
+       for (i = 0; i < nvec; i++)
+               clear_comp_irq_affinity_hint(mdev, i);
+}
+
+struct cpumask *
+mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx)
+{
+       return irq_table->irq[vecidx].mask;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table)
+{
+       return irq_table->rmap;
+}
+#endif
+
+static void unrequest_irqs(struct mlx5_core_dev *dev)
+{
+       struct mlx5_irq_table *table = dev->priv.irq_table;
+       int i;
+
+       for (i = 0; i < table->nvec; i++)
+               free_irq(pci_irq_vector(dev->pdev, i),
+                        &mlx5_irq_get(dev, i)->nh);
+}
+
+int mlx5_irq_table_create(struct mlx5_core_dev *dev)
+{
+       struct mlx5_priv *priv = &dev->priv;
+       struct mlx5_irq_table *table = priv->irq_table;
+       int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+                     MLX5_CAP_GEN(dev, max_num_eqs) :
+                     1 << MLX5_CAP_GEN(dev, log_max_eq);
+       int nvec;
+       int err;
+
+       nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
+              MLX5_IRQ_VEC_COMP_BASE;
+       nvec = min_t(int, nvec, num_eqs);
+       if (nvec <= MLX5_IRQ_VEC_COMP_BASE)
+               return -ENOMEM;
+
+       table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL);
+       if (!table->irq)
+               return -ENOMEM;
+
+       nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
+                                    nvec, PCI_IRQ_MSIX);
+       if (nvec < 0) {
+               err = nvec;
+               goto err_free_irq;
+       }
+
+       table->nvec = nvec;
+
+       err = irq_set_rmap(dev);
+       if (err)
+               goto err_set_rmap;
+
+       err = request_irqs(dev, nvec);
+       if (err)
+               goto err_request_irqs;
+
+       err = set_comp_irq_affinity_hints(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n");
+               goto err_set_affinity;
+       }
+
+       return 0;
+
+err_set_affinity:
+       unrequest_irqs(dev);
+err_request_irqs:
+       irq_clear_rmap(dev);
+err_set_rmap:
+       pci_free_irq_vectors(dev->pdev);
+err_free_irq:
+       kfree(table->irq);
+       return err;
+}
+
+void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
+{
+       struct mlx5_irq_table *table = dev->priv.irq_table;
+       int i;
+
+       /* free_irq requires that affinity and rmap will be cleared
+        * before calling it. This is why there is asymmetry with set_rmap
+        * which should be called after alloc_irq but before request_irq.
+        */
+       irq_clear_rmap(dev);
+       clear_comp_irqs_affinity_hints(dev);
+       for (i = 0; i < table->nvec; i++)
+               free_irq(pci_irq_vector(dev->pdev, i),
+                        &mlx5_irq_get(dev, i)->nh);
+       pci_free_irq_vectors(dev->pdev);
+       kfree(table->irq);
+}
+
index a249b3c..2eecb83 100644 (file)
@@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
        mlx5_device_disable_sriov(dev);
 }
 
+static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
+{
+       u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {};
+       u16 host_total_vfs;
+       int err;
+
+       if (mlx5_core_is_ecpf_esw_manager(dev)) {
+               err = mlx5_esw_query_functions(dev, out, sizeof(out));
+               host_total_vfs = MLX5_GET(query_esw_functions_out, out,
+                                         host_params_context.host_total_vfs);
+
+               /* Old FW doesn't support getting total_vfs from esw func
+                * but supports getting it from pci_sriov.
+                */
+               if (!err && host_total_vfs)
+                       return host_total_vfs;
+       }
+
+       return pci_sriov_get_totalvfs(dev->pdev);
+}
+
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
                return 0;
 
        total_vfs = pci_sriov_get_totalvfs(pdev);
+       sriov->max_vfs = mlx5_get_max_vfs(dev);
        sriov->num_vfs = pci_num_vf(pdev);
        sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
        if (!sriov->vfs_ctx)
index 25847be..3ef6639 100644 (file)
@@ -41,7 +41,7 @@
 #include <linux/semaphore.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/radix-tree.h>
+#include <linux/xarray.h>
 #include <linux/workqueue.h>
 #include <linux/mempool.h>
 #include <linux/interrupt.h>
@@ -458,13 +458,6 @@ struct mlx5_qp_table {
        struct radix_tree_root  tree;
 };
 
-struct mlx5_mkey_table {
-       /* protect radix tree
-        */
-       rwlock_t                lock;
-       struct radix_tree_root  tree;
-};
-
 struct mlx5_vf_context {
        int     enabled;
        u64     port_guid;
@@ -476,6 +469,7 @@ struct mlx5_core_sriov {
        struct mlx5_vf_context  *vfs_ctx;
        int                     num_vfs;
        int                     enabled_vfs;
+       u16                     max_vfs;
 };
 
 struct mlx5_fc_stats {
@@ -497,6 +491,7 @@ struct mlx5_eswitch;
 struct mlx5_lag;
 struct mlx5_devcom;
 struct mlx5_eq_table;
+struct mlx5_irq_table;
 
 struct mlx5_rate_limit {
        u32                     rate;
@@ -526,6 +521,8 @@ struct mlx5_core_roce {
 };
 
 struct mlx5_priv {
+       /* IRQ table valid only for real pci devices PF or VF */
+       struct mlx5_irq_table   *irq_table;
        struct mlx5_eq_table    *eq_table;
 
        /* pages stuff */
@@ -548,9 +545,7 @@ struct mlx5_priv {
        struct dentry          *cmdif_debugfs;
        /* end: qp staff */
 
-       /* start: mkey staff */
-       struct mlx5_mkey_table  mkey_table;
-       /* end: mkey staff */
+       struct xarray           mkey_table;
 
        /* start: alloc staff */
        /* protect buffer alocation according to numa node */
@@ -1112,13 +1107,9 @@ static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
        return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
 }
 
-#define MLX5_HOST_PF_MAX_VFS   (127u)
 static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
 {
-       if (mlx5_core_is_ecpf_esw_manager(dev))
-               return MLX5_HOST_PF_MAX_VFS;
-       else
-               return pci_sriov_get_totalvfs(dev->pdev);
+       return dev->priv.sriov.max_vfs;
 }
 
 static inline int mlx5_get_gid_table_len(u16 param)
index 00045cc..70e16dc 100644 (file)
@@ -4,17 +4,7 @@
 #ifndef MLX5_CORE_EQ_H
 #define MLX5_CORE_EQ_H
 
-enum {
-       MLX5_EQ_PAGEREQ_IDX        = 0,
-       MLX5_EQ_CMD_IDX            = 1,
-       MLX5_EQ_ASYNC_IDX          = 2,
-       /* reserved to be used by mlx5_core ulps (mlx5e/mlx5_ib) */
-       MLX5_EQ_PFAULT_IDX         = 3,
-       MLX5_EQ_MAX_ASYNC_EQS,
-       /* completion eqs vector indices start here */
-       MLX5_EQ_VEC_COMP_BASE = MLX5_EQ_MAX_ASYNC_EQS,
-};
-
+#define MLX5_IRQ_VEC_COMP_BASE 1
 #define MLX5_NUM_CMD_EQE   (32)
 #define MLX5_NUM_ASYNC_EQE (0x1000)
 #define MLX5_NUM_SPARE_EQE (0x80)
@@ -23,18 +13,19 @@ struct mlx5_eq;
 struct mlx5_core_dev;
 
 struct mlx5_eq_param {
-       u8             index;
+       u8             irq_index;
        int            nent;
        u64            mask;
-       void          *context;
-       irq_handler_t  handler;
 };
 
 struct mlx5_eq *
-mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name,
-                      struct mlx5_eq_param *param);
+mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param);
 int
 mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
+int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                  struct notifier_block *nb);
+void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
+                    struct notifier_block *nb);
 
 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc);
 void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm);
index e9a55c0..091e258 100644 (file)
@@ -7,6 +7,7 @@
 #define _MLX5_ESWITCH_
 
 #include <linux/mlx5/driver.h>
+#include <net/devlink.h>
 
 #define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
 
@@ -62,4 +63,32 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
                                    u16 vport_num, u32 sqn);
+
+#ifdef CONFIG_MLX5_ESWITCH
+enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
+
+bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
+u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+                                             u16 vport_num);
+#else  /* CONFIG_MLX5_ESWITCH */
+static inline enum devlink_eswitch_encap_mode
+mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
+{
+       return DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+}
+
+static inline bool
+mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
+{
+       return false;
+};
+
+static inline u32
+mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
+                                         int vport_num)
+{
+       return 0;
+};
+#endif /* CONFIG_MLX5_ESWITCH */
 #endif
index 2ddaa97..dc7e7aa 100644 (file)
@@ -88,10 +88,21 @@ struct mlx5_flow_group;
 struct mlx5_flow_namespace;
 struct mlx5_flow_handle;
 
+enum {
+       FLOW_CONTEXT_HAS_TAG = BIT(0),
+};
+
+struct mlx5_flow_context {
+       u32 flags;
+       u32 flow_tag;
+       u32 flow_source;
+};
+
 struct mlx5_flow_spec {
        u8   match_criteria_enable;
        u32  match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
        u32  match_value[MLX5_ST_SZ_DW(fte_match_param)];
+       struct mlx5_flow_context flow_context;
 };
 
 enum {
@@ -173,13 +184,11 @@ struct mlx5_fs_vlan {
 #define MLX5_FS_VLAN_DEPTH     2
 
 enum {
-       FLOW_ACT_HAS_TAG   = BIT(0),
-       FLOW_ACT_NO_APPEND = BIT(1),
+       FLOW_ACT_NO_APPEND = BIT(0),
 };
 
 struct mlx5_flow_act {
        u32 action;
-       u32 flow_tag;
        u32 reformat_id;
        u32 modify_id;
        uintptr_t esp_id;
@@ -190,7 +199,6 @@ struct mlx5_flow_act {
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
        struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
-                                     .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
                                      .reformat_id = 0, \
                                      .modify_id = 0, \
                                      .flags =  0, }
index 6513b98..d440965 100644 (file)
@@ -528,7 +528,21 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
 
        struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
 
-       u8         reserved_at_80[0x100];
+       u8         metadata_reg_c_7[0x20];
+
+       u8         metadata_reg_c_6[0x20];
+
+       u8         metadata_reg_c_5[0x20];
+
+       u8         metadata_reg_c_4[0x20];
+
+       u8         metadata_reg_c_3[0x20];
+
+       u8         metadata_reg_c_2[0x20];
+
+       u8         metadata_reg_c_1[0x20];
+
+       u8         metadata_reg_c_0[0x20];
 
        u8         metadata_reg_a[0x20];
 
@@ -636,8 +650,22 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
        u8         reserved_at_e00[0x7200];
 };
 
+enum {
+       MLX5_FDB_TO_VPORT_REG_C_0 = 0x01,
+       MLX5_FDB_TO_VPORT_REG_C_1 = 0x02,
+       MLX5_FDB_TO_VPORT_REG_C_2 = 0x04,
+       MLX5_FDB_TO_VPORT_REG_C_3 = 0x08,
+       MLX5_FDB_TO_VPORT_REG_C_4 = 0x10,
+       MLX5_FDB_TO_VPORT_REG_C_5 = 0x20,
+       MLX5_FDB_TO_VPORT_REG_C_6 = 0x40,
+       MLX5_FDB_TO_VPORT_REG_C_7 = 0x80,
+};
+
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
-       u8      reserved_at_0[0x1a];
+       u8      fdb_to_vport_reg_c_id[0x8];
+       u8      reserved_at_8[0xf];
+       u8      flow_source[0x1];
+       u8      reserved_at_18[0x2];
        u8      multi_fdb_encap[0x1];
        u8      reserved_at_1b[0x1];
        u8      fdb_multi_path_to_table[0x1];
@@ -665,7 +693,9 @@ struct mlx5_ifc_e_switch_cap_bits {
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_insert_if_not_exist[0x1];
        u8         vport_cvlan_insert_overwrite[0x1];
-       u8         reserved_at_5[0x14];
+       u8         reserved_at_5[0x3];
+       u8         esw_uplink_ingress_acl[0x1];
+       u8         reserved_at_9[0x10];
        u8         esw_functions_changed[0x1];
        u8         reserved_at_1a[0x1];
        u8         ecpf_vport_exists[0x1];
@@ -2555,6 +2585,12 @@ enum {
        MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
 };
 
+enum {
+       MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT         = 0x0,
+       MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK            = 0x1,
+       MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT       = 0x2,
+};
+
 struct mlx5_ifc_vlan_bits {
        u8         ethtype[0x10];
        u8         prio[0x3];
@@ -2574,7 +2610,9 @@ struct mlx5_ifc_flow_context_bits {
        u8         action[0x10];
 
        u8         extended_destination[0x1];
-       u8         reserved_at_80[0x7];
+       u8         reserved_at_81[0x1];
+       u8         flow_source[0x2];
+       u8         reserved_at_84[0x4];
        u8         destination_list_size[0x18];
 
        u8         reserved_at_a0[0x8];
@@ -3099,12 +3137,14 @@ struct mlx5_ifc_hca_vport_context_bits {
 };
 
 struct mlx5_ifc_esw_vport_context_bits {
-       u8         reserved_at_0[0x3];
+       u8         fdb_to_vport_reg_c[0x1];
+       u8         reserved_at_1[0x2];
        u8         vport_svlan_strip[0x1];
        u8         vport_cvlan_strip[0x1];
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_insert[0x2];
-       u8         reserved_at_8[0x18];
+       u8         fdb_to_vport_reg_c_id[0x8];
+       u8         reserved_at_10[0x10];
 
        u8         reserved_at_20[0x20];
 
@@ -4985,7 +5025,8 @@ struct mlx5_ifc_modify_esw_vport_context_out_bits {
 };
 
 struct mlx5_ifc_esw_vport_context_fields_select_bits {
-       u8         reserved_at_0[0x1c];
+       u8         reserved_at_0[0x1b];
+       u8         fdb_to_vport_reg_c_id[0x1];
        u8         vport_cvlan_insert[0x1];
        u8         vport_svlan_insert[0x1];
        u8         vport_cvlan_strip[0x1];
@@ -5182,6 +5223,7 @@ enum {
        MLX5_ACTION_IN_FIELD_OUT_DIPV4         = 0x16,
        MLX5_ACTION_IN_FIELD_OUT_FIRST_VID     = 0x17,
        MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
+       MLX5_ACTION_IN_FIELD_METADATA_REG_C_0  = 0x51,
 };
 
 struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -9711,7 +9753,7 @@ struct mlx5_ifc_host_params_context_bits {
        u8         reserved_at_8[0x8];
        u8         host_num_of_vfs[0x10];
 
-       u8         reserved_at_20[0x10];
+       u8         host_total_vfs[0x10];
        u8         host_pci_bus[0x10];
 
        u8         reserved_at_40[0x10];
index 3ba4edb..d1f353c 100644 (file)
@@ -551,11 +551,6 @@ static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u
        return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
 }
 
-static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u32 key)
-{
-       return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
-}
-
 int mlx5_core_create_dct(struct mlx5_core_dev *dev,
                         struct mlx5_core_dct *qp,
                         u32 *in, int inlen,
index 6c51e86..6625ea0 100644 (file)
@@ -528,8 +528,10 @@ struct devlink_ops {
        int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
        int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode,
                                       struct netlink_ext_ack *extack);
-       int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
-       int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
+       int (*eswitch_encap_mode_get)(struct devlink *devlink,
+                                     enum devlink_eswitch_encap_mode *p_encap_mode);
+       int (*eswitch_encap_mode_set)(struct devlink *devlink,
+                                     enum devlink_eswitch_encap_mode encap_mode,
                                      struct netlink_ext_ack *extack);
        int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
                        struct netlink_ext_ack *extack);
index 4baf716..89c5337 100644 (file)
@@ -1549,7 +1549,8 @@ static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
                                   u32 seq, int flags)
 {
        const struct devlink_ops *ops = devlink->ops;
-       u8 inline_mode, encap_mode;
+       enum devlink_eswitch_encap_mode encap_mode;
+       u8 inline_mode;
        void *hdr;
        int err = 0;
        u16 mode;
@@ -1625,7 +1626,8 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
 {
        struct devlink *devlink = info->user_ptr[0];
        const struct devlink_ops *ops = devlink->ops;
-       u8 inline_mode, encap_mode;
+       enum devlink_eswitch_encap_mode encap_mode;
+       u8 inline_mode;
        int err = 0;
        u16 mode;