net/mlx5e: Extract tc tunnel encap/decap code to dedicated file
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
index dd0bfba..cfe340e 100644 (file)
@@ -63,6 +63,8 @@
 #include "en/mapping.h"
 #include "en/tc_ct.h"
 #include "en/mod_hdr.h"
+#include "en/tc_priv.h"
+#include "en/tc_tun_encap.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
 #include "lib/fs_chains.h"
 
 #define nic_chains(priv) ((priv)->fs.tc.chains)
 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
-#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
-
-enum {
-       MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
-       MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
-       MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
-       MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
-       MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
-       MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
-       MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
-       MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
-       MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
-       MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
-       MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
-       MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
-       MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
-       MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
-};
-
-#define MLX5E_TC_MAX_SPLITS 1
-
-/* Helper struct for accessing a struct containing list_head array.
- * Containing struct
- *   |- Helper array
- *      [0] Helper item 0
- *          |- list_head item 0
- *          |- index (0)
- *      [1] Helper item 1
- *          |- list_head item 1
- *          |- index (1)
- * To access the containing struct from one of the list_head items:
- * 1. Get the helper item from the list_head item using
- *    helper item =
- *        container_of(list_head item, helper struct type, list_head field)
- * 2. Get the contining struct from the helper item and its index in the array:
- *    containing struct =
- *        container_of(helper item, containing struct type, helper field[index])
- */
-struct encap_flow_item {
-       struct mlx5e_encap_entry *e; /* attached encap instance */
-       struct list_head list;
-       int index;
-};
-
-struct mlx5e_tc_flow {
-       struct rhash_head       node;
-       struct mlx5e_priv       *priv;
-       u64                     cookie;
-       unsigned long           flags;
-       struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
-
-       /* flows sharing the same reformat object - currently mpls decap */
-       struct list_head l3_to_l2_reformat;
-       struct mlx5e_decap_entry *decap_reformat;
-
-       /* Flow can be associated with multiple encap IDs.
-        * The number of encaps is bounded by the number of supported
-        * destinations.
-        */
-       struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
-       struct mlx5e_tc_flow    *peer_flow;
-       struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
-       struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
-       struct list_head        hairpin; /* flows sharing the same hairpin */
-       struct list_head        peer;    /* flows with peer flow */
-       struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
-       struct net_device       *orig_dev; /* netdev adding flow first */
-       int                     tmp_efi_index;
-       struct list_head        tmp_list; /* temporary flow list used by neigh update */
-       refcount_t              refcnt;
-       struct rcu_head         rcu_head;
-       struct completion       init_done;
-       int tunnel_id; /* the mapped tunnel id of this flow */
-       struct mlx5_flow_attr *attr;
-};
-
-struct mlx5e_tc_flow_parse_attr {
-       const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
-       struct net_device *filter_dev;
-       struct mlx5_flow_spec spec;
-       struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
-       int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
-       struct ethhdr eth;
-};
 
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
@@ -165,10 +83,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
                .moffset = 0,
                .mlen = 2,
        },
+       [VPORT_TO_REG] = {
+               .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+               .moffset = 2,
+               .mlen = 2,
+       },
        [TUNNEL_TO_REG] = {
                .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
                .moffset = 1,
-               .mlen = 3,
+               .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8),
                .soffset = MLX5_BYTE_OFF(fte_match_param,
                                         misc_parameters_2.metadata_reg_c_1),
        },
@@ -190,6 +113,14 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
        [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
 };
 
+/* To avoid false lock dependency warning set the tc_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key tc_ht_lock_key;
+
 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
 
 void
@@ -355,15 +286,14 @@ struct mlx5e_hairpin_entry {
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow);
 
-static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
+struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
 {
        if (!flow || !refcount_inc_not_zero(&flow->refcnt))
                return ERR_PTR(-EINVAL);
        return flow;
 }
 
-static void mlx5e_flow_put(struct mlx5e_priv *priv,
-                          struct mlx5e_tc_flow *flow)
+void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
 {
        if (refcount_dec_and_test(&flow->refcnt)) {
                mlx5e_tc_del_flow(priv, flow);
@@ -371,48 +301,6 @@ static void mlx5e_flow_put(struct mlx5e_priv *priv,
        }
 }
 
-static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
-       /* Complete all memory stores before setting bit. */
-       smp_mb__before_atomic();
-       set_bit(flag, &flow->flags);
-}
-
-#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
-
-static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
-                                    unsigned long flag)
-{
-       /* test_and_set_bit() provides all necessary barriers */
-       return test_and_set_bit(flag, &flow->flags);
-}
-
-#define flow_flag_test_and_set(flow, flag)                     \
-       __flow_flag_test_and_set(flow,                          \
-                                MLX5E_TC_FLOW_FLAG_##flag)
-
-static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
-       /* Complete all memory stores before clearing bit. */
-       smp_mb__before_atomic();
-       clear_bit(flag, &flow->flags);
-}
-
-#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
-                                                     MLX5E_TC_FLOW_FLAG_##flag)
-
-static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
-{
-       bool ret = test_bit(flag, &flow->flags);
-
-       /* Read fields of flow structure only after checking flags. */
-       smp_mb__after_atomic();
-       return ret;
-}
-
-#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
-                                                   MLX5E_TC_FLOW_FLAG_##flag)
-
 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
 {
        return flow_flag_test(flow, ESWITCH);
@@ -423,7 +311,7 @@ static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
        return flow_flag_test(flow, FT);
 }
 
-static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
+bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
 {
        return flow_flag_test(flow, OFFLOADED);
 }
@@ -1138,23 +1026,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
        kfree(flow->attr);
 }
 
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-                              struct mlx5e_tc_flow *flow, int out_index);
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
-                             struct mlx5e_tc_flow *flow,
-                             struct net_device *mirred_dev,
-                             int out_index,
-                             struct netlink_ext_ack *extack,
-                             struct net_device **encap_dev,
-                             bool *encap_valid);
-static int mlx5e_attach_decap(struct mlx5e_priv *priv,
-                             struct mlx5e_tc_flow *flow,
-                             struct netlink_ext_ack *extack);
-static void mlx5e_detach_decap(struct mlx5e_priv *priv,
-                              struct mlx5e_tc_flow *flow);
-
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
                           struct mlx5e_tc_flow *flow,
                           struct mlx5_flow_spec *spec,
@@ -1189,10 +1061,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
        return rule;
 }
 
-static void
-mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
-                            struct mlx5e_tc_flow *flow,
-                            struct mlx5_flow_attr *attr)
+void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+                                 struct mlx5e_tc_flow *flow,
+                                 struct mlx5_flow_attr *attr)
 {
        flow_flag_clear(flow, OFFLOADED);
 
@@ -1211,7 +1082,7 @@ offload_rule_0:
        mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
 }
 
-static struct mlx5_flow_handle *
+struct mlx5_flow_handle *
 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
                              struct mlx5e_tc_flow *flow,
                              struct mlx5_flow_spec *spec)
@@ -1237,9 +1108,8 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
        return rule;
 }
 
-static void
-mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
-                                 struct mlx5e_tc_flow *flow)
+void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+                                      struct mlx5e_tc_flow *flow)
 {
        struct mlx5_flow_attr *slow_attr;
 
@@ -1307,6 +1177,43 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
        mutex_unlock(&uplink_priv->unready_flows_lock);
 }
 
+static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
+
+bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
+{
+       struct mlx5_core_dev *out_mdev, *route_mdev;
+       struct mlx5e_priv *out_priv, *route_priv;
+
+       out_priv = netdev_priv(out_dev);
+       out_mdev = out_priv->mdev;
+       route_priv = netdev_priv(route_dev);
+       route_mdev = route_priv->mdev;
+
+       if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
+           route_mdev->coredev_type != MLX5_COREDEV_VF)
+               return false;
+
+       return same_hw_devs(out_priv, route_priv);
+}
+
+int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+{
+       struct mlx5e_priv *out_priv, *route_priv;
+       struct mlx5_core_dev *route_mdev;
+       struct mlx5_eswitch *esw;
+       u16 vhca_id;
+       int err;
+
+       out_priv = netdev_priv(out_dev);
+       esw = out_priv->mdev->priv.eswitch;
+       route_priv = netdev_priv(route_dev);
+       route_mdev = route_priv->mdev;
+
+       vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+       err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+       return err;
+}
+
 static int
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
                      struct mlx5e_tc_flow *flow,
@@ -1325,12 +1232,6 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
        int err = 0;
        int out_index;
 
-       if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
-               NL_SET_ERR_MSG_MOD(extack,
-                                  "E-switch priorities unsupported, upgrade FW");
-               return -EOPNOTSUPP;
-       }
-
        /* We check chain range only for tc flows.
         * For ft flows, we checked attr->chain was originally 0 and set it to
         * FDB_FT_CHAIN which is outside tc range.
@@ -1459,6 +1360,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
                        kfree(attr->parse_attr->tun_info[out_index]);
                }
        kvfree(attr->parse_attr);
+       kvfree(attr->esw_attr->rx_tun_attr);
 
        mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
 
@@ -1474,139 +1376,11 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        kfree(flow->attr);
 }
 
-void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
-                             struct mlx5e_encap_entry *e,
-                             struct list_head *flow_list)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_attr *attr;
-       struct mlx5_flow_spec *spec;
-       struct mlx5e_tc_flow *flow;
-       int err;
-
-       e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-                                                    e->reformat_type,
-                                                    e->encap_size, e->encap_header,
-                                                    MLX5_FLOW_NAMESPACE_FDB);
-       if (IS_ERR(e->pkt_reformat)) {
-               mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
-                              PTR_ERR(e->pkt_reformat));
-               return;
-       }
-       e->flags |= MLX5_ENCAP_ENTRY_VALID;
-       mlx5e_rep_queue_neigh_stats_work(priv);
-
-       list_for_each_entry(flow, flow_list, tmp_list) {
-               bool all_flow_encaps_valid = true;
-               int i;
-
-               if (!mlx5e_is_offloaded_flow(flow))
-                       continue;
-               attr = flow->attr;
-               esw_attr = attr->esw_attr;
-               spec = &attr->parse_attr->spec;
-
-               esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
-               esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
-               /* Flow can be associated with multiple encap entries.
-                * Before offloading the flow verify that all of them have
-                * a valid neighbour.
-                */
-               for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
-                       if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
-                               continue;
-                       if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
-                               all_flow_encaps_valid = false;
-                               break;
-                       }
-               }
-               /* Do not offload flows with unresolved neighbors */
-               if (!all_flow_encaps_valid)
-                       continue;
-               /* update from slow path rule to encap rule */
-               rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
-               if (IS_ERR(rule)) {
-                       err = PTR_ERR(rule);
-                       mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
-                                      err);
-                       continue;
-               }
-
-               mlx5e_tc_unoffload_from_slow_path(esw, flow);
-               flow->rule[0] = rule;
-               /* was unset when slow path rule removed */
-               flow_flag_set(flow, OFFLOADED);
-       }
-}
-
-void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
-                             struct mlx5e_encap_entry *e,
-                             struct list_head *flow_list)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_attr *attr;
-       struct mlx5_flow_spec *spec;
-       struct mlx5e_tc_flow *flow;
-       int err;
-
-       list_for_each_entry(flow, flow_list, tmp_list) {
-               if (!mlx5e_is_offloaded_flow(flow))
-                       continue;
-               attr = flow->attr;
-               esw_attr = attr->esw_attr;
-               spec = &attr->parse_attr->spec;
-
-               /* update from encap rule to slow path rule */
-               rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
-               /* mark the flow's encap dest as non-valid */
-               esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
-
-               if (IS_ERR(rule)) {
-                       err = PTR_ERR(rule);
-                       mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
-                                      err);
-                       continue;
-               }
-
-               mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
-               flow->rule[0] = rule;
-               /* was unset when fast path rule removed */
-               flow_flag_set(flow, OFFLOADED);
-       }
-
-       /* we know that the encap is valid */
-       e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
-       mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
-}
-
-static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
 {
        return flow->attr->counter;
 }
 
-/* Takes reference to all flows attached to encap and adds the flows to
- * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
- */
-void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
-{
-       struct encap_flow_item *efi;
-       struct mlx5e_tc_flow *flow;
-
-       list_for_each_entry(efi, &e->flows, list) {
-               flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
-               if (IS_ERR(mlx5e_flow_get(flow)))
-                       continue;
-               wait_for_completion(&flow->init_done);
-
-               flow->tmp_efi_index = efi->index;
-               list_add(&flow->tmp_list, flow_list);
-       }
-}
-
 /* Iterate over tmp_list of flows attached to flow_list head. */
 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
 {
@@ -1616,222 +1390,6 @@ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_l
                mlx5e_flow_put(priv, flow);
 }
 
-static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
-                          struct mlx5e_encap_entry *e)
-{
-       struct mlx5e_encap_entry *next = NULL;
-
-retry:
-       rcu_read_lock();
-
-       /* find encap with non-zero reference counter value */
-       for (next = e ?
-                    list_next_or_null_rcu(&nhe->encap_list,
-                                          &e->encap_list,
-                                          struct mlx5e_encap_entry,
-                                          encap_list) :
-                    list_first_or_null_rcu(&nhe->encap_list,
-                                           struct mlx5e_encap_entry,
-                                           encap_list);
-            next;
-            next = list_next_or_null_rcu(&nhe->encap_list,
-                                         &next->encap_list,
-                                         struct mlx5e_encap_entry,
-                                         encap_list))
-               if (mlx5e_encap_take(next))
-                       break;
-
-       rcu_read_unlock();
-
-       /* release starting encap */
-       if (e)
-               mlx5e_encap_put(netdev_priv(e->out_dev), e);
-       if (!next)
-               return next;
-
-       /* wait for encap to be fully initialized */
-       wait_for_completion(&next->res_ready);
-       /* continue searching if encap entry is not in valid state after completion */
-       if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
-               e = next;
-               goto retry;
-       }
-
-       return next;
-}
-
-void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
-{
-       struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
-       struct mlx5e_encap_entry *e = NULL;
-       struct mlx5e_tc_flow *flow;
-       struct mlx5_fc *counter;
-       struct neigh_table *tbl;
-       bool neigh_used = false;
-       struct neighbour *n;
-       u64 lastuse;
-
-       if (m_neigh->family == AF_INET)
-               tbl = &arp_tbl;
-#if IS_ENABLED(CONFIG_IPV6)
-       else if (m_neigh->family == AF_INET6)
-               tbl = ipv6_stub->nd_tbl;
-#endif
-       else
-               return;
-
-       /* mlx5e_get_next_valid_encap() releases previous encap before returning
-        * next one.
-        */
-       while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
-               struct mlx5e_priv *priv = netdev_priv(e->out_dev);
-               struct encap_flow_item *efi, *tmp;
-               struct mlx5_eswitch *esw;
-               LIST_HEAD(flow_list);
-
-               esw = priv->mdev->priv.eswitch;
-               mutex_lock(&esw->offloads.encap_tbl_lock);
-               list_for_each_entry_safe(efi, tmp, &e->flows, list) {
-                       flow = container_of(efi, struct mlx5e_tc_flow,
-                                           encaps[efi->index]);
-                       if (IS_ERR(mlx5e_flow_get(flow)))
-                               continue;
-                       list_add(&flow->tmp_list, &flow_list);
-
-                       if (mlx5e_is_offloaded_flow(flow)) {
-                               counter = mlx5e_tc_get_counter(flow);
-                               lastuse = mlx5_fc_query_lastuse(counter);
-                               if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
-                                       neigh_used = true;
-                                       break;
-                               }
-                       }
-               }
-               mutex_unlock(&esw->offloads.encap_tbl_lock);
-
-               mlx5e_put_encap_flow_list(priv, &flow_list);
-               if (neigh_used) {
-                       /* release current encap before breaking the loop */
-                       mlx5e_encap_put(priv, e);
-                       break;
-               }
-       }
-
-       trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
-
-       if (neigh_used) {
-               nhe->reported_lastuse = jiffies;
-
-               /* find the relevant neigh according to the cached device and
-                * dst ip pair
-                */
-               n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
-               if (!n)
-                       return;
-
-               neigh_event_send(n, NULL);
-               neigh_release(n);
-       }
-}
-
-static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
-{
-       WARN_ON(!list_empty(&e->flows));
-
-       if (e->compl_result > 0) {
-               mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
-
-               if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-                       mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
-       }
-
-       kfree(e->tun_info);
-       kfree(e->encap_header);
-       kfree_rcu(e, rcu);
-}
-
-static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
-                               struct mlx5e_decap_entry *d)
-{
-       WARN_ON(!list_empty(&d->flows));
-
-       if (!d->compl_result)
-               mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
-
-       kfree_rcu(d, rcu);
-}
-
-void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
-       if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
-               return;
-       hash_del_rcu(&e->encap_hlist);
-       mutex_unlock(&esw->offloads.encap_tbl_lock);
-
-       mlx5e_encap_dealloc(priv, e);
-}
-
-static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
-       if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
-               return;
-       hash_del_rcu(&d->hlist);
-       mutex_unlock(&esw->offloads.decap_tbl_lock);
-
-       mlx5e_decap_dealloc(priv, d);
-}
-
-static void mlx5e_detach_encap(struct mlx5e_priv *priv,
-                              struct mlx5e_tc_flow *flow, int out_index)
-{
-       struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
-       /* flow wasn't fully initialized */
-       if (!e)
-               return;
-
-       mutex_lock(&esw->offloads.encap_tbl_lock);
-       list_del(&flow->encaps[out_index].list);
-       flow->encaps[out_index].e = NULL;
-       if (!refcount_dec_and_test(&e->refcnt)) {
-               mutex_unlock(&esw->offloads.encap_tbl_lock);
-               return;
-       }
-       hash_del_rcu(&e->encap_hlist);
-       mutex_unlock(&esw->offloads.encap_tbl_lock);
-
-       mlx5e_encap_dealloc(priv, e);
-}
-
-static void mlx5e_detach_decap(struct mlx5e_priv *priv,
-                              struct mlx5e_tc_flow *flow)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5e_decap_entry *d = flow->decap_reformat;
-
-       if (!d)
-               return;
-
-       mutex_lock(&esw->offloads.decap_tbl_lock);
-       list_del(&flow->l3_to_l2_reformat);
-       flow->decap_reformat = NULL;
-
-       if (!refcount_dec_and_test(&d->refcnt)) {
-               mutex_unlock(&esw->offloads.decap_tbl_lock);
-               return;
-       }
-       hash_del_rcu(&d->hlist);
-       mutex_unlock(&esw->offloads.decap_tbl_lock);
-
-       mlx5e_decap_dealloc(priv, d);
-}
-
 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
 {
        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
@@ -2089,6 +1647,29 @@ void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
        }
 }
 
+u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
+{
+       void *headers_v;
+       u16 ethertype;
+       u8 ip_version;
+
+       if (outer)
+               headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
+       else
+               headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
+
+       ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
+       /* Return ip_version converted from ethertype anyway */
+       if (!ip_version) {
+               ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
+               if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
+                       ip_version = 4;
+               else if (ethertype == ETH_P_IPV6)
+                       ip_version = 6;
+       }
+       return ip_version;
+}
+
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
                             struct mlx5e_tc_flow *flow,
                             struct mlx5_flow_spec *spec,
@@ -2097,6 +1678,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                             u8 *match_level,
                             bool *match_inner)
 {
+       struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct netlink_ext_ack *extack = f->common.extack;
        bool needs_mapping, sets_mapping;
@@ -2134,6 +1716,31 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                 */
                if (!netif_is_bareudp(filter_dev))
                        flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+               err = mlx5e_tc_set_attr_rx_tun(flow, spec);
+               if (err)
+                       return err;
+       } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
+               struct mlx5_flow_spec *tmp_spec;
+
+               tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
+               if (!tmp_spec) {
+                       NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
+                       netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
+                       return -ENOMEM;
+               }
+               memcpy(tmp_spec, spec, sizeof(*tmp_spec));
+
+               err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
+               if (err) {
+                       kvfree(tmp_spec);
+                       NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
+                       netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
+                       return err;
+               }
+               err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
+               kvfree(tmp_spec);
+               if (err)
+                       return err;
        }
 
        if (!needs_mapping && !sets_mapping)
@@ -3582,35 +3189,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
        return 0;
 }
 
-struct encap_key {
-       const struct ip_tunnel_key *ip_tun_key;
-       struct mlx5e_tc_tunnel *tc_tunnel;
-};
-
-static inline int cmp_encap_info(struct encap_key *a,
-                                struct encap_key *b)
-{
-       return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
-              a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
-}
-
-static inline int cmp_decap_info(struct mlx5e_decap_key *a,
-                                struct mlx5e_decap_key *b)
-{
-       return memcmp(&a->key, &b->key, sizeof(b->key));
-}
-
-static inline int hash_encap_info(struct encap_key *key)
-{
-       return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
-                    key->tc_tunnel->tunnel_type);
-}
-
-static inline int hash_decap_info(struct mlx5e_decap_key *key)
-{
-       return jhash(&key->key, sizeof(key->key), 0);
-}
-
 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
                                  struct net_device *peer_netdev)
 {
@@ -3624,277 +3202,6 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
                same_hw_devs(priv, peer_priv));
 }
 
-bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
-{
-       return refcount_inc_not_zero(&e->refcnt);
-}
-
-static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
-{
-       return refcount_inc_not_zero(&e->refcnt);
-}
-
-static struct mlx5e_encap_entry *
-mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
-               uintptr_t hash_key)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5e_encap_entry *e;
-       struct encap_key e_key;
-
-       hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
-                                  encap_hlist, hash_key) {
-               e_key.ip_tun_key = &e->tun_info->key;
-               e_key.tc_tunnel = e->tunnel;
-               if (!cmp_encap_info(&e_key, key) &&
-                   mlx5e_encap_take(e))
-                       return e;
-       }
-
-       return NULL;
-}
-
-static struct mlx5e_decap_entry *
-mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
-               uintptr_t hash_key)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5e_decap_key r_key;
-       struct mlx5e_decap_entry *e;
-
-       hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
-                                  hlist, hash_key) {
-               r_key = e->key;
-               if (!cmp_decap_info(&r_key, key) &&
-                   mlx5e_decap_take(e))
-                       return e;
-       }
-       return NULL;
-}
-
-static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
-{
-       size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
-
-       return kmemdup(tun_info, tun_size, GFP_KERNEL);
-}
-
-static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
-                                     struct mlx5e_tc_flow *flow,
-                                     int out_index,
-                                     struct mlx5e_encap_entry *e,
-                                     struct netlink_ext_ack *extack)
-{
-       int i;
-
-       for (i = 0; i < out_index; i++) {
-               if (flow->encaps[i].e != e)
-                       continue;
-               NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
-               netdev_err(priv->netdev, "can't duplicate encap action\n");
-               return true;
-       }
-
-       return false;
-}
-
-static int mlx5e_attach_encap(struct mlx5e_priv *priv,
-                             struct mlx5e_tc_flow *flow,
-                             struct net_device *mirred_dev,
-                             int out_index,
-                             struct netlink_ext_ack *extack,
-                             struct net_device **encap_dev,
-                             bool *encap_valid)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5e_tc_flow_parse_attr *parse_attr;
-       struct mlx5_flow_attr *attr = flow->attr;
-       const struct ip_tunnel_info *tun_info;
-       struct encap_key key;
-       struct mlx5e_encap_entry *e;
-       unsigned short family;
-       uintptr_t hash_key;
-       int err = 0;
-
-       parse_attr = attr->parse_attr;
-       tun_info = parse_attr->tun_info[out_index];
-       family = ip_tunnel_info_af(tun_info);
-       key.ip_tun_key = &tun_info->key;
-       key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
-       if (!key.tc_tunnel) {
-               NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
-               return -EOPNOTSUPP;
-       }
-
-       hash_key = hash_encap_info(&key);
-
-       mutex_lock(&esw->offloads.encap_tbl_lock);
-       e = mlx5e_encap_get(priv, &key, hash_key);
-
-       /* must verify if encap is valid or not */
-       if (e) {
-               /* Check that entry was not already attached to this flow */
-               if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
-                       err = -EOPNOTSUPP;
-                       goto out_err;
-               }
-
-               mutex_unlock(&esw->offloads.encap_tbl_lock);
-               wait_for_completion(&e->res_ready);
-
-               /* Protect against concurrent neigh update. */
-               mutex_lock(&esw->offloads.encap_tbl_lock);
-               if (e->compl_result < 0) {
-                       err = -EREMOTEIO;
-                       goto out_err;
-               }
-               goto attach_flow;
-       }
-
-       e = kzalloc(sizeof(*e), GFP_KERNEL);
-       if (!e) {
-               err = -ENOMEM;
-               goto out_err;
-       }
-
-       refcount_set(&e->refcnt, 1);
-       init_completion(&e->res_ready);
-
-       tun_info = dup_tun_info(tun_info);
-       if (!tun_info) {
-               err = -ENOMEM;
-               goto out_err_init;
-       }
-       e->tun_info = tun_info;
-       err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
-       if (err)
-               goto out_err_init;
-
-       INIT_LIST_HEAD(&e->flows);
-       hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
-       mutex_unlock(&esw->offloads.encap_tbl_lock);
-
-       if (family == AF_INET)
-               err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
-       else if (family == AF_INET6)
-               err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
-
-       /* Protect against concurrent neigh update. */
-       mutex_lock(&esw->offloads.encap_tbl_lock);
-       complete_all(&e->res_ready);
-       if (err) {
-               e->compl_result = err;
-               goto out_err;
-       }
-       e->compl_result = 1;
-
-attach_flow:
-       flow->encaps[out_index].e = e;
-       list_add(&flow->encaps[out_index].list, &e->flows);
-       flow->encaps[out_index].index = out_index;
-       *encap_dev = e->out_dev;
-       if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
-               attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
-               attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
-               *encap_valid = true;
-       } else {
-               *encap_valid = false;
-       }
-       mutex_unlock(&esw->offloads.encap_tbl_lock);
-
-       return err;
-
-out_err:
-       mutex_unlock(&esw->offloads.encap_tbl_lock);
-       if (e)
-               mlx5e_encap_put(priv, e);
-       return err;
-
-out_err_init:
-       mutex_unlock(&esw->offloads.encap_tbl_lock);
-       kfree(tun_info);
-       kfree(e);
-       return err;
-}
-
-static int mlx5e_attach_decap(struct mlx5e_priv *priv,
-                             struct mlx5e_tc_flow *flow,
-                             struct netlink_ext_ack *extack)
-{
-       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
-       struct mlx5e_tc_flow_parse_attr *parse_attr;
-       struct mlx5e_decap_entry *d;
-       struct mlx5e_decap_key key;
-       uintptr_t hash_key;
-       int err = 0;
-
-       parse_attr = flow->attr->parse_attr;
-       if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
-               NL_SET_ERR_MSG_MOD(extack,
-                                  "encap header larger than max supported");
-               return -EOPNOTSUPP;
-       }
-
-       key.key = parse_attr->eth;
-       hash_key = hash_decap_info(&key);
-       mutex_lock(&esw->offloads.decap_tbl_lock);
-       d = mlx5e_decap_get(priv, &key, hash_key);
-       if (d) {
-               mutex_unlock(&esw->offloads.decap_tbl_lock);
-               wait_for_completion(&d->res_ready);
-               mutex_lock(&esw->offloads.decap_tbl_lock);
-               if (d->compl_result) {
-                       err = -EREMOTEIO;
-                       goto out_free;
-               }
-               goto found;
-       }
-
-       d = kzalloc(sizeof(*d), GFP_KERNEL);
-       if (!d) {
-               err = -ENOMEM;
-               goto out_err;
-       }
-
-       d->key = key;
-       refcount_set(&d->refcnt, 1);
-       init_completion(&d->res_ready);
-       INIT_LIST_HEAD(&d->flows);
-       hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
-       mutex_unlock(&esw->offloads.decap_tbl_lock);
-
-       d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
-                                                    MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
-                                                    sizeof(parse_attr->eth),
-                                                    &parse_attr->eth,
-                                                    MLX5_FLOW_NAMESPACE_FDB);
-       if (IS_ERR(d->pkt_reformat)) {
-               err = PTR_ERR(d->pkt_reformat);
-               d->compl_result = err;
-       }
-       mutex_lock(&esw->offloads.decap_tbl_lock);
-       complete_all(&d->res_ready);
-       if (err)
-               goto out_free;
-
-found:
-       flow->decap_reformat = d;
-       attr->decap_pkt_reformat = d->pkt_reformat;
-       list_add(&flow->l3_to_l2_reformat, &d->flows);
-       mutex_unlock(&esw->offloads.decap_tbl_lock);
-       return 0;
-
-out_free:
-       mutex_unlock(&esw->offloads.decap_tbl_lock);
-       mlx5e_decap_put(priv, d);
-       return err;
-
-out_err:
-       mutex_unlock(&esw->offloads.decap_tbl_lock);
-       return err;
-}
-
 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
                                const struct flow_action_entry *act,
                                struct mlx5_esw_flow_attr *attr,
@@ -4247,7 +3554,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                        if (encap) {
                                parse_attr->mirred_ifindex[esw_attr->out_count] =
                                        out_dev->ifindex;
-                               parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
+                               parse_attr->tun_info[esw_attr->out_count] =
+                                       mlx5e_dup_tun_info(info);
                                if (!parse_attr->tun_info[esw_attr->out_count])
                                        return -ENOMEM;
                                encap = false;
@@ -4384,6 +3692,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                }
        }
 
+       if (decap && esw_attr->rx_tun_attr) {
+               err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
+               if (err)
+                       return err;
+       }
+
+       /* always set IP version for indirect table handling */
+       attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
+
        if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
            action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
                /* For prio tag mode, replace vlan pop with rewrite vlan prio
@@ -5221,6 +4538,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
        if (err)
                return err;
 
+       lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
+
        if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
                attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
                        MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
@@ -5328,7 +4647,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        }
        uplink_priv->tunnel_mapping = mapping;
 
-       mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
+       /* 0xFFF is reserved for stack devices slow path table mark */
+       mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
        if (IS_ERR(mapping)) {
                err = PTR_ERR(mapping);
                goto err_enc_opts_mapping;
@@ -5339,6 +4659,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        if (err)
                goto err_ht_init;
 
+       lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
+
        return err;
 
 err_ht_init:
@@ -5460,7 +4782,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
                tc_skb_ext->chain = chain;
 
                zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
-                                 ZONE_RESTORE_MAX;
+                       ESW_ZONE_ID_MASK;
 
                if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
                                              zone_restore_id))