net/mlx5e: Refactor reg_c1 usage
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <net/tc_act/tc_gact.h>
38 #include <net/tc_act/tc_skbedit.h>
39 #include <linux/mlx5/fs.h>
40 #include <linux/mlx5/device.h>
41 #include <linux/rhashtable.h>
42 #include <linux/refcount.h>
43 #include <linux/completion.h>
44 #include <net/tc_act/tc_mirred.h>
45 #include <net/tc_act/tc_vlan.h>
46 #include <net/tc_act/tc_tunnel_key.h>
47 #include <net/tc_act/tc_pedit.h>
48 #include <net/tc_act/tc_csum.h>
49 #include <net/tc_act/tc_mpls.h>
50 #include <net/arp.h>
51 #include <net/ipv6_stubs.h>
52 #include <net/bareudp.h>
53 #include <net/bonding.h>
54 #include "en.h"
55 #include "en_rep.h"
56 #include "en/rep/tc.h"
57 #include "en/rep/neigh.h"
58 #include "en_tc.h"
59 #include "eswitch.h"
60 #include "fs_core.h"
61 #include "en/port.h"
62 #include "en/tc_tun.h"
63 #include "en/mapping.h"
64 #include "en/tc_ct.h"
65 #include "en/mod_hdr.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "lib/fs_chains.h"
69 #include "diag/en_tc_tracepoint.h"
70 #include <asm/div64.h>
71
72 #define nic_chains(priv) ((priv)->fs.tc.chains)
73 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
74 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
75
76 enum {
77         MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
78         MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
79         MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
80         MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
81         MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
82         MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
83         MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
84         MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
85         MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
86         MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
87         MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
88         MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
89         MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
90         MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
91 };
92
93 #define MLX5E_TC_MAX_SPLITS 1
94
95 /* Helper struct for accessing a struct containing list_head array.
96  * Containing struct
97  *   |- Helper array
98  *      [0] Helper item 0
99  *          |- list_head item 0
100  *          |- index (0)
101  *      [1] Helper item 1
102  *          |- list_head item 1
103  *          |- index (1)
104  * To access the containing struct from one of the list_head items:
105  * 1. Get the helper item from the list_head item using
106  *    helper item =
107  *        container_of(list_head item, helper struct type, list_head field)
108  * 2. Get the contining struct from the helper item and its index in the array:
109  *    containing struct =
110  *        container_of(helper item, containing struct type, helper field[index])
111  */
112 struct encap_flow_item {
113         struct mlx5e_encap_entry *e; /* attached encap instance */
114         struct list_head list;
115         int index;
116 };
117
118 struct mlx5e_tc_flow {
119         struct rhash_head       node;
120         struct mlx5e_priv       *priv;
121         u64                     cookie;
122         unsigned long           flags;
123         struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
124
125         /* flows sharing the same reformat object - currently mpls decap */
126         struct list_head l3_to_l2_reformat;
127         struct mlx5e_decap_entry *decap_reformat;
128
129         /* Flow can be associated with multiple encap IDs.
130          * The number of encaps is bounded by the number of supported
131          * destinations.
132          */
133         struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
134         struct mlx5e_tc_flow    *peer_flow;
135         struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
136         struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
137         struct list_head        hairpin; /* flows sharing the same hairpin */
138         struct list_head        peer;    /* flows with peer flow */
139         struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
140         struct net_device       *orig_dev; /* netdev adding flow first */
141         int                     tmp_efi_index;
142         struct list_head        tmp_list; /* temporary flow list used by neigh update */
143         refcount_t              refcnt;
144         struct rcu_head         rcu_head;
145         struct completion       init_done;
146         int tunnel_id; /* the mapped tunnel id of this flow */
147         struct mlx5_flow_attr *attr;
148 };
149
150 struct mlx5e_tc_flow_parse_attr {
151         const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
152         struct net_device *filter_dev;
153         struct mlx5_flow_spec spec;
154         struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
155         int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
156         struct ethhdr eth;
157 };
158
159 #define MLX5E_TC_TABLE_NUM_GROUPS 4
160 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
161
162 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
163         [CHAIN_TO_REG] = {
164                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
165                 .moffset = 0,
166                 .mlen = 2,
167         },
168         [VPORT_TO_REG] = {
169                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
170                 .moffset = 2,
171                 .mlen = 2,
172         },
173         [TUNNEL_TO_REG] = {
174                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
175                 .moffset = 1,
176                 .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8),
177                 .soffset = MLX5_BYTE_OFF(fte_match_param,
178                                          misc_parameters_2.metadata_reg_c_1),
179         },
180         [ZONE_TO_REG] = zone_to_reg_ct,
181         [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
182         [CTSTATE_TO_REG] = ctstate_to_reg_ct,
183         [MARK_TO_REG] = mark_to_reg_ct,
184         [LABELS_TO_REG] = labels_to_reg_ct,
185         [FTEID_TO_REG] = fteid_to_reg_ct,
186         /* For NIC rules we store the retore metadata directly
187          * into reg_b that is passed to SW since we don't
188          * jump between steering domains.
189          */
190         [NIC_CHAIN_TO_REG] = {
191                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
192                 .moffset = 0,
193                 .mlen = 2,
194         },
195         [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
196 };
197
198 /* To avoid false lock dependency warning set the tc_ht lock
199  * class different than the lock class of the ht being used when deleting
200  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
201  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
202  * it's different than the ht->mutex here.
203  */
204 static struct lock_class_key tc_ht_lock_key;
205
206 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
207
208 void
209 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
210                             enum mlx5e_tc_attr_to_reg type,
211                             u32 data,
212                             u32 mask)
213 {
214         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
215         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
216         void *headers_c = spec->match_criteria;
217         void *headers_v = spec->match_value;
218         void *fmask, *fval;
219
220         fmask = headers_c + soffset;
221         fval = headers_v + soffset;
222
223         mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
224         data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
225
226         memcpy(fmask, &mask, match_len);
227         memcpy(fval, &data, match_len);
228
229         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
230 }
231
232 void
233 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
234                                 enum mlx5e_tc_attr_to_reg type,
235                                 u32 *data,
236                                 u32 *mask)
237 {
238         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
239         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
240         void *headers_c = spec->match_criteria;
241         void *headers_v = spec->match_value;
242         void *fmask, *fval;
243
244         fmask = headers_c + soffset;
245         fval = headers_v + soffset;
246
247         memcpy(mask, fmask, match_len);
248         memcpy(data, fval, match_len);
249
250         *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
251         *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
252 }
253
254 int
255 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
256                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
257                           enum mlx5_flow_namespace_type ns,
258                           enum mlx5e_tc_attr_to_reg type,
259                           u32 data)
260 {
261         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
262         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
263         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
264         char *modact;
265         int err;
266
267         err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
268         if (err)
269                 return err;
270
271         modact = mod_hdr_acts->actions +
272                  (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
273
274         /* Firmware has 5bit length field and 0 means 32bits */
275         if (mlen == 4)
276                 mlen = 0;
277
278         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
279         MLX5_SET(set_action_in, modact, field, mfield);
280         MLX5_SET(set_action_in, modact, offset, moffset * 8);
281         MLX5_SET(set_action_in, modact, length, mlen * 8);
282         MLX5_SET(set_action_in, modact, data, data);
283         mod_hdr_acts->num_actions++;
284
285         return 0;
286 }
287
288 static struct mlx5_tc_ct_priv *
289 get_ct_priv(struct mlx5e_priv *priv)
290 {
291         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
292         struct mlx5_rep_uplink_priv *uplink_priv;
293         struct mlx5e_rep_priv *uplink_rpriv;
294
295         if (is_mdev_switchdev_mode(priv->mdev)) {
296                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
297                 uplink_priv = &uplink_rpriv->uplink_priv;
298
299                 return uplink_priv->ct_priv;
300         }
301
302         return priv->fs.tc.ct;
303 }
304
305 struct mlx5_flow_handle *
306 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
307                     struct mlx5_flow_spec *spec,
308                     struct mlx5_flow_attr *attr)
309 {
310         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
311
312         if (is_mdev_switchdev_mode(priv->mdev))
313                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
314
315         return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
316 }
317
318 void
319 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
320                     struct mlx5_flow_handle *rule,
321                     struct mlx5_flow_attr *attr)
322 {
323         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
324
325         if (is_mdev_switchdev_mode(priv->mdev)) {
326                 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
327
328                 return;
329         }
330
331         mlx5e_del_offloaded_nic_rule(priv, rule, attr);
332 }
333
334 struct mlx5e_hairpin {
335         struct mlx5_hairpin *pair;
336
337         struct mlx5_core_dev *func_mdev;
338         struct mlx5e_priv *func_priv;
339         u32 tdn;
340         u32 tirn;
341
342         int num_channels;
343         struct mlx5e_rqt indir_rqt;
344         u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
345         struct mlx5e_ttc_table ttc;
346 };
347
348 struct mlx5e_hairpin_entry {
349         /* a node of a hash table which keeps all the  hairpin entries */
350         struct hlist_node hairpin_hlist;
351
352         /* protects flows list */
353         spinlock_t flows_lock;
354         /* flows sharing the same hairpin */
355         struct list_head flows;
356         /* hpe's that were not fully initialized when dead peer update event
357          * function traversed them.
358          */
359         struct list_head dead_peer_wait_list;
360
361         u16 peer_vhca_id;
362         u8 prio;
363         struct mlx5e_hairpin *hp;
364         refcount_t refcnt;
365         struct completion res_ready;
366 };
367
368 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
369                               struct mlx5e_tc_flow *flow);
370
371 static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
372 {
373         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
374                 return ERR_PTR(-EINVAL);
375         return flow;
376 }
377
378 static void mlx5e_flow_put(struct mlx5e_priv *priv,
379                            struct mlx5e_tc_flow *flow)
380 {
381         if (refcount_dec_and_test(&flow->refcnt)) {
382                 mlx5e_tc_del_flow(priv, flow);
383                 kfree_rcu(flow, rcu_head);
384         }
385 }
386
387 static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
388 {
389         /* Complete all memory stores before setting bit. */
390         smp_mb__before_atomic();
391         set_bit(flag, &flow->flags);
392 }
393
394 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
395
396 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
397                                      unsigned long flag)
398 {
399         /* test_and_set_bit() provides all necessary barriers */
400         return test_and_set_bit(flag, &flow->flags);
401 }
402
403 #define flow_flag_test_and_set(flow, flag)                      \
404         __flow_flag_test_and_set(flow,                          \
405                                  MLX5E_TC_FLOW_FLAG_##flag)
406
407 static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
408 {
409         /* Complete all memory stores before clearing bit. */
410         smp_mb__before_atomic();
411         clear_bit(flag, &flow->flags);
412 }
413
414 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
415                                                       MLX5E_TC_FLOW_FLAG_##flag)
416
417 static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
418 {
419         bool ret = test_bit(flag, &flow->flags);
420
421         /* Read fields of flow structure only after checking flags. */
422         smp_mb__after_atomic();
423         return ret;
424 }
425
426 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
427                                                     MLX5E_TC_FLOW_FLAG_##flag)
428
429 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
430 {
431         return flow_flag_test(flow, ESWITCH);
432 }
433
434 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
435 {
436         return flow_flag_test(flow, FT);
437 }
438
439 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
440 {
441         return flow_flag_test(flow, OFFLOADED);
442 }
443
444 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
445 {
446         return mlx5e_is_eswitch_flow(flow) ?
447                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
448 }
449
450 static struct mod_hdr_tbl *
451 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
452 {
453         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
454
455         return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
456                 &esw->offloads.mod_hdr :
457                 &priv->fs.tc.mod_hdr;
458 }
459
460 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
461                                 struct mlx5e_tc_flow *flow,
462                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
463 {
464         struct mlx5_modify_hdr *modify_hdr;
465         struct mlx5e_mod_hdr_handle *mh;
466
467         mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
468                                   get_flow_name_space(flow),
469                                   &parse_attr->mod_hdr_acts);
470         if (IS_ERR(mh))
471                 return PTR_ERR(mh);
472
473         modify_hdr = mlx5e_mod_hdr_get(mh);
474         flow->attr->modify_hdr = modify_hdr;
475         flow->mh = mh;
476
477         return 0;
478 }
479
480 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
481                                  struct mlx5e_tc_flow *flow)
482 {
483         /* flow wasn't fully initialized */
484         if (!flow->mh)
485                 return;
486
487         mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
488                              flow->mh);
489         flow->mh = NULL;
490 }
491
492 static
493 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
494 {
495         struct net_device *netdev;
496         struct mlx5e_priv *priv;
497
498         netdev = __dev_get_by_index(net, ifindex);
499         priv = netdev_priv(netdev);
500         return priv->mdev;
501 }
502
503 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
504 {
505         u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
506         void *tirc;
507         int err;
508
509         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
510         if (err)
511                 goto alloc_tdn_err;
512
513         tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
514
515         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
516         MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
517         MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
518
519         err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
520         if (err)
521                 goto create_tir_err;
522
523         return 0;
524
525 create_tir_err:
526         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
527 alloc_tdn_err:
528         return err;
529 }
530
531 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
532 {
533         mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
534         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
535 }
536
537 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
538 {
539         u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
540         struct mlx5e_priv *priv = hp->func_priv;
541         int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
542
543         mlx5e_build_default_indir_rqt(indirection_rqt, sz,
544                                       hp->num_channels);
545
546         for (i = 0; i < sz; i++) {
547                 ix = i;
548                 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
549                         ix = mlx5e_bits_invert(i, ilog2(sz));
550                 ix = indirection_rqt[ix];
551                 rqn = hp->pair->rqn[ix];
552                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
553         }
554 }
555
556 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
557 {
558         int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
559         struct mlx5e_priv *priv = hp->func_priv;
560         struct mlx5_core_dev *mdev = priv->mdev;
561         void *rqtc;
562         u32 *in;
563
564         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
565         in = kvzalloc(inlen, GFP_KERNEL);
566         if (!in)
567                 return -ENOMEM;
568
569         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
570
571         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
572         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
573
574         mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
575
576         err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
577         if (!err)
578                 hp->indir_rqt.enabled = true;
579
580         kvfree(in);
581         return err;
582 }
583
584 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
585 {
586         struct mlx5e_priv *priv = hp->func_priv;
587         u32 in[MLX5_ST_SZ_DW(create_tir_in)];
588         int tt, i, err;
589         void *tirc;
590
591         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
592                 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
593
594                 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
595                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
596
597                 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
598                 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
599                 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
600                 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
601
602                 err = mlx5_core_create_tir(hp->func_mdev, in,
603                                            &hp->indir_tirn[tt]);
604                 if (err) {
605                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
606                         goto err_destroy_tirs;
607                 }
608         }
609         return 0;
610
611 err_destroy_tirs:
612         for (i = 0; i < tt; i++)
613                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
614         return err;
615 }
616
617 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
618 {
619         int tt;
620
621         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
622                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
623 }
624
625 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
626                                          struct ttc_params *ttc_params)
627 {
628         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
629         int tt;
630
631         memset(ttc_params, 0, sizeof(*ttc_params));
632
633         ttc_params->any_tt_tirn = hp->tirn;
634
635         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
636                 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
637
638         ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
639         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
640         ft_attr->prio = MLX5E_TC_PRIO;
641 }
642
643 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
644 {
645         struct mlx5e_priv *priv = hp->func_priv;
646         struct ttc_params ttc_params;
647         int err;
648
649         err = mlx5e_hairpin_create_indirect_rqt(hp);
650         if (err)
651                 return err;
652
653         err = mlx5e_hairpin_create_indirect_tirs(hp);
654         if (err)
655                 goto err_create_indirect_tirs;
656
657         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
658         err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
659         if (err)
660                 goto err_create_ttc_table;
661
662         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
663                    hp->num_channels, hp->ttc.ft.t->id);
664
665         return 0;
666
667 err_create_ttc_table:
668         mlx5e_hairpin_destroy_indirect_tirs(hp);
669 err_create_indirect_tirs:
670         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
671
672         return err;
673 }
674
675 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
676 {
677         struct mlx5e_priv *priv = hp->func_priv;
678
679         mlx5e_destroy_ttc_table(priv, &hp->ttc);
680         mlx5e_hairpin_destroy_indirect_tirs(hp);
681         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
682 }
683
684 static struct mlx5e_hairpin *
685 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
686                      int peer_ifindex)
687 {
688         struct mlx5_core_dev *func_mdev, *peer_mdev;
689         struct mlx5e_hairpin *hp;
690         struct mlx5_hairpin *pair;
691         int err;
692
693         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
694         if (!hp)
695                 return ERR_PTR(-ENOMEM);
696
697         func_mdev = priv->mdev;
698         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
699
700         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
701         if (IS_ERR(pair)) {
702                 err = PTR_ERR(pair);
703                 goto create_pair_err;
704         }
705         hp->pair = pair;
706         hp->func_mdev = func_mdev;
707         hp->func_priv = priv;
708         hp->num_channels = params->num_channels;
709
710         err = mlx5e_hairpin_create_transport(hp);
711         if (err)
712                 goto create_transport_err;
713
714         if (hp->num_channels > 1) {
715                 err = mlx5e_hairpin_rss_init(hp);
716                 if (err)
717                         goto rss_init_err;
718         }
719
720         return hp;
721
722 rss_init_err:
723         mlx5e_hairpin_destroy_transport(hp);
724 create_transport_err:
725         mlx5_core_hairpin_destroy(hp->pair);
726 create_pair_err:
727         kfree(hp);
728         return ERR_PTR(err);
729 }
730
731 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
732 {
733         if (hp->num_channels > 1)
734                 mlx5e_hairpin_rss_cleanup(hp);
735         mlx5e_hairpin_destroy_transport(hp);
736         mlx5_core_hairpin_destroy(hp->pair);
737         kvfree(hp);
738 }
739
740 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
741 {
742         return (peer_vhca_id << 16 | prio);
743 }
744
745 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
746                                                      u16 peer_vhca_id, u8 prio)
747 {
748         struct mlx5e_hairpin_entry *hpe;
749         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
750
751         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
752                                hairpin_hlist, hash_key) {
753                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
754                         refcount_inc(&hpe->refcnt);
755                         return hpe;
756                 }
757         }
758
759         return NULL;
760 }
761
762 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
763                               struct mlx5e_hairpin_entry *hpe)
764 {
765         /* no more hairpin flows for us, release the hairpin pair */
766         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
767                 return;
768         hash_del(&hpe->hairpin_hlist);
769         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
770
771         if (!IS_ERR_OR_NULL(hpe->hp)) {
772                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
773                            dev_name(hpe->hp->pair->peer_mdev->device));
774
775                 mlx5e_hairpin_destroy(hpe->hp);
776         }
777
778         WARN_ON(!list_empty(&hpe->flows));
779         kfree(hpe);
780 }
781
782 #define UNKNOWN_MATCH_PRIO 8
783
784 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
785                                   struct mlx5_flow_spec *spec, u8 *match_prio,
786                                   struct netlink_ext_ack *extack)
787 {
788         void *headers_c, *headers_v;
789         u8 prio_val, prio_mask = 0;
790         bool vlan_present;
791
792 #ifdef CONFIG_MLX5_CORE_EN_DCB
793         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
794                 NL_SET_ERR_MSG_MOD(extack,
795                                    "only PCP trust state supported for hairpin");
796                 return -EOPNOTSUPP;
797         }
798 #endif
799         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
800         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
801
802         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
803         if (vlan_present) {
804                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
805                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
806         }
807
808         if (!vlan_present || !prio_mask) {
809                 prio_val = UNKNOWN_MATCH_PRIO;
810         } else if (prio_mask != 0x7) {
811                 NL_SET_ERR_MSG_MOD(extack,
812                                    "masked priority match not supported for hairpin");
813                 return -EOPNOTSUPP;
814         }
815
816         *match_prio = prio_val;
817         return 0;
818 }
819
820 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
821                                   struct mlx5e_tc_flow *flow,
822                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
823                                   struct netlink_ext_ack *extack)
824 {
825         int peer_ifindex = parse_attr->mirred_ifindex[0];
826         struct mlx5_hairpin_params params;
827         struct mlx5_core_dev *peer_mdev;
828         struct mlx5e_hairpin_entry *hpe;
829         struct mlx5e_hairpin *hp;
830         u64 link_speed64;
831         u32 link_speed;
832         u8 match_prio;
833         u16 peer_id;
834         int err;
835
836         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
837         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
838                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
839                 return -EOPNOTSUPP;
840         }
841
842         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
843         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
844                                      extack);
845         if (err)
846                 return err;
847
848         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
849         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
850         if (hpe) {
851                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
852                 wait_for_completion(&hpe->res_ready);
853
854                 if (IS_ERR(hpe->hp)) {
855                         err = -EREMOTEIO;
856                         goto out_err;
857                 }
858                 goto attach_flow;
859         }
860
861         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
862         if (!hpe) {
863                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
864                 return -ENOMEM;
865         }
866
867         spin_lock_init(&hpe->flows_lock);
868         INIT_LIST_HEAD(&hpe->flows);
869         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
870         hpe->peer_vhca_id = peer_id;
871         hpe->prio = match_prio;
872         refcount_set(&hpe->refcnt, 1);
873         init_completion(&hpe->res_ready);
874
875         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
876                  hash_hairpin_info(peer_id, match_prio));
877         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
878
879         params.log_data_size = 15;
880         params.log_data_size = min_t(u8, params.log_data_size,
881                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
882         params.log_data_size = max_t(u8, params.log_data_size,
883                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
884
885         params.log_num_packets = params.log_data_size -
886                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
887         params.log_num_packets = min_t(u8, params.log_num_packets,
888                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
889
890         params.q_counter = priv->q_counter;
891         /* set hairpin pair per each 50Gbs share of the link */
892         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
893         link_speed = max_t(u32, link_speed, 50000);
894         link_speed64 = link_speed;
895         do_div(link_speed64, 50000);
896         params.num_channels = link_speed64;
897
898         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
899         hpe->hp = hp;
900         complete_all(&hpe->res_ready);
901         if (IS_ERR(hp)) {
902                 err = PTR_ERR(hp);
903                 goto out_err;
904         }
905
906         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
907                    hp->tirn, hp->pair->rqn[0],
908                    dev_name(hp->pair->peer_mdev->device),
909                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
910
911 attach_flow:
912         if (hpe->hp->num_channels > 1) {
913                 flow_flag_set(flow, HAIRPIN_RSS);
914                 flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
915         } else {
916                 flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
917         }
918
919         flow->hpe = hpe;
920         spin_lock(&hpe->flows_lock);
921         list_add(&flow->hairpin, &hpe->flows);
922         spin_unlock(&hpe->flows_lock);
923
924         return 0;
925
926 out_err:
927         mlx5e_hairpin_put(priv, hpe);
928         return err;
929 }
930
931 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
932                                    struct mlx5e_tc_flow *flow)
933 {
934         /* flow wasn't fully initialized */
935         if (!flow->hpe)
936                 return;
937
938         spin_lock(&flow->hpe->flows_lock);
939         list_del(&flow->hairpin);
940         spin_unlock(&flow->hpe->flows_lock);
941
942         mlx5e_hairpin_put(priv, flow->hpe);
943         flow->hpe = NULL;
944 }
945
946 struct mlx5_flow_handle *
947 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
948                              struct mlx5_flow_spec *spec,
949                              struct mlx5_flow_attr *attr)
950 {
951         struct mlx5_flow_context *flow_context = &spec->flow_context;
952         struct mlx5_fs_chains *nic_chains = nic_chains(priv);
953         struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
954         struct mlx5e_tc_table *tc = &priv->fs.tc;
955         struct mlx5_flow_destination dest[2] = {};
956         struct mlx5_flow_act flow_act = {
957                 .action = attr->action,
958                 .flags    = FLOW_ACT_NO_APPEND,
959         };
960         struct mlx5_flow_handle *rule;
961         struct mlx5_flow_table *ft;
962         int dest_ix = 0;
963
964         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
965         flow_context->flow_tag = nic_attr->flow_tag;
966
967         if (attr->dest_ft) {
968                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
969                 dest[dest_ix].ft = attr->dest_ft;
970                 dest_ix++;
971         } else if (nic_attr->hairpin_ft) {
972                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
973                 dest[dest_ix].ft = nic_attr->hairpin_ft;
974                 dest_ix++;
975         } else if (nic_attr->hairpin_tirn) {
976                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
977                 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
978                 dest_ix++;
979         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
980                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
981                 if (attr->dest_chain) {
982                         dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
983                                                                  attr->dest_chain, 1,
984                                                                  MLX5E_TC_FT_LEVEL);
985                         if (IS_ERR(dest[dest_ix].ft))
986                                 return ERR_CAST(dest[dest_ix].ft);
987                 } else {
988                         dest[dest_ix].ft = priv->fs.vlan.ft.t;
989                 }
990                 dest_ix++;
991         }
992
993         if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
994             MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
995                 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
996
997         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
998                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
999                 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1000                 dest_ix++;
1001         }
1002
1003         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1004                 flow_act.modify_hdr = attr->modify_hdr;
1005
1006         mutex_lock(&tc->t_lock);
1007         if (IS_ERR_OR_NULL(tc->t)) {
1008                 /* Create the root table here if doesn't exist yet */
1009                 tc->t =
1010                         mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1011
1012                 if (IS_ERR(tc->t)) {
1013                         mutex_unlock(&tc->t_lock);
1014                         netdev_err(priv->netdev,
1015                                    "Failed to create tc offload table\n");
1016                         rule = ERR_CAST(priv->fs.tc.t);
1017                         goto err_ft_get;
1018                 }
1019         }
1020         mutex_unlock(&tc->t_lock);
1021
1022         if (attr->chain || attr->prio)
1023                 ft = mlx5_chains_get_table(nic_chains,
1024                                            attr->chain, attr->prio,
1025                                            MLX5E_TC_FT_LEVEL);
1026         else
1027                 ft = attr->ft;
1028
1029         if (IS_ERR(ft)) {
1030                 rule = ERR_CAST(ft);
1031                 goto err_ft_get;
1032         }
1033
1034         if (attr->outer_match_level != MLX5_MATCH_NONE)
1035                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1036
1037         rule = mlx5_add_flow_rules(ft, spec,
1038                                    &flow_act, dest, dest_ix);
1039         if (IS_ERR(rule))
1040                 goto err_rule;
1041
1042         return rule;
1043
1044 err_rule:
1045         if (attr->chain || attr->prio)
1046                 mlx5_chains_put_table(nic_chains,
1047                                       attr->chain, attr->prio,
1048                                       MLX5E_TC_FT_LEVEL);
1049 err_ft_get:
1050         if (attr->dest_chain)
1051                 mlx5_chains_put_table(nic_chains,
1052                                       attr->dest_chain, 1,
1053                                       MLX5E_TC_FT_LEVEL);
1054
1055         return ERR_CAST(rule);
1056 }
1057
1058 static int
1059 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1060                       struct mlx5e_tc_flow_parse_attr *parse_attr,
1061                       struct mlx5e_tc_flow *flow,
1062                       struct netlink_ext_ack *extack)
1063 {
1064         struct mlx5_flow_attr *attr = flow->attr;
1065         struct mlx5_core_dev *dev = priv->mdev;
1066         struct mlx5_fc *counter = NULL;
1067         int err;
1068
1069         if (flow_flag_test(flow, HAIRPIN)) {
1070                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1071                 if (err)
1072                         return err;
1073         }
1074
1075         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1076                 counter = mlx5_fc_create(dev, true);
1077                 if (IS_ERR(counter))
1078                         return PTR_ERR(counter);
1079
1080                 attr->counter = counter;
1081         }
1082
1083         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1084                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1085                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1086                 if (err)
1087                         return err;
1088         }
1089
1090         if (flow_flag_test(flow, CT))
1091                 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1092                                                         attr, &parse_attr->mod_hdr_acts);
1093         else
1094                 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1095                                                              attr);
1096
1097         return PTR_ERR_OR_ZERO(flow->rule[0]);
1098 }
1099
1100 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1101                                   struct mlx5_flow_handle *rule,
1102                                   struct mlx5_flow_attr *attr)
1103 {
1104         struct mlx5_fs_chains *nic_chains = nic_chains(priv);
1105
1106         mlx5_del_flow_rules(rule);
1107
1108         if (attr->chain || attr->prio)
1109                 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1110                                       MLX5E_TC_FT_LEVEL);
1111
1112         if (attr->dest_chain)
1113                 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1114                                       MLX5E_TC_FT_LEVEL);
1115 }
1116
1117 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1118                                   struct mlx5e_tc_flow *flow)
1119 {
1120         struct mlx5_flow_attr *attr = flow->attr;
1121         struct mlx5e_tc_table *tc = &priv->fs.tc;
1122
1123         flow_flag_clear(flow, OFFLOADED);
1124
1125         if (flow_flag_test(flow, CT))
1126                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1127         else if (!IS_ERR_OR_NULL(flow->rule[0]))
1128                 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1129
1130         /* Remove root table if no rules are left to avoid
1131          * extra steering hops.
1132          */
1133         mutex_lock(&priv->fs.tc.t_lock);
1134         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1135             !IS_ERR_OR_NULL(tc->t)) {
1136                 mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1137                 priv->fs.tc.t = NULL;
1138         }
1139         mutex_unlock(&priv->fs.tc.t_lock);
1140
1141         kvfree(attr->parse_attr);
1142
1143         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1144                 mlx5e_detach_mod_hdr(priv, flow);
1145
1146         mlx5_fc_destroy(priv->mdev, attr->counter);
1147
1148         if (flow_flag_test(flow, HAIRPIN))
1149                 mlx5e_hairpin_flow_del(priv, flow);
1150
1151         kfree(flow->attr);
1152 }
1153
1154 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1155                                struct mlx5e_tc_flow *flow, int out_index);
1156
1157 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1158                               struct mlx5e_tc_flow *flow,
1159                               struct net_device *mirred_dev,
1160                               int out_index,
1161                               struct netlink_ext_ack *extack,
1162                               struct net_device **encap_dev,
1163                               bool *encap_valid);
1164 static int mlx5e_attach_decap(struct mlx5e_priv *priv,
1165                               struct mlx5e_tc_flow *flow,
1166                               struct netlink_ext_ack *extack);
1167 static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1168                                struct mlx5e_tc_flow *flow);
1169
1170 static struct mlx5_flow_handle *
1171 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1172                            struct mlx5e_tc_flow *flow,
1173                            struct mlx5_flow_spec *spec,
1174                            struct mlx5_flow_attr *attr)
1175 {
1176         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1177         struct mlx5_flow_handle *rule;
1178
1179         if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1180                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1181
1182         if (flow_flag_test(flow, CT)) {
1183                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1184
1185                 return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1186                                                flow, spec, attr,
1187                                                mod_hdr_acts);
1188         }
1189
1190         rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1191         if (IS_ERR(rule))
1192                 return rule;
1193
1194         if (attr->esw_attr->split_count) {
1195                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1196                 if (IS_ERR(flow->rule[1])) {
1197                         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1198                         return flow->rule[1];
1199                 }
1200         }
1201
1202         return rule;
1203 }
1204
1205 static void
1206 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1207                              struct mlx5e_tc_flow *flow,
1208                              struct mlx5_flow_attr *attr)
1209 {
1210         flow_flag_clear(flow, OFFLOADED);
1211
1212         if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
1213                 goto offload_rule_0;
1214
1215         if (flow_flag_test(flow, CT)) {
1216                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1217                 return;
1218         }
1219
1220         if (attr->esw_attr->split_count)
1221                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1222
1223 offload_rule_0:
1224         mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1225 }
1226
1227 static struct mlx5_flow_handle *
1228 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1229                               struct mlx5e_tc_flow *flow,
1230                               struct mlx5_flow_spec *spec)
1231 {
1232         struct mlx5_flow_attr *slow_attr;
1233         struct mlx5_flow_handle *rule;
1234
1235         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1236         if (!slow_attr)
1237                 return ERR_PTR(-ENOMEM);
1238
1239         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1240         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1241         slow_attr->esw_attr->split_count = 0;
1242         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1243
1244         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1245         if (!IS_ERR(rule))
1246                 flow_flag_set(flow, SLOW);
1247
1248         kfree(slow_attr);
1249
1250         return rule;
1251 }
1252
1253 static void
1254 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1255                                   struct mlx5e_tc_flow *flow)
1256 {
1257         struct mlx5_flow_attr *slow_attr;
1258
1259         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1260         if (!slow_attr) {
1261                 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1262                 return;
1263         }
1264
1265         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1266         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1267         slow_attr->esw_attr->split_count = 0;
1268         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1269         mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1270         flow_flag_clear(flow, SLOW);
1271         kfree(slow_attr);
1272 }
1273
1274 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1275  * function.
1276  */
1277 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1278                              struct list_head *unready_flows)
1279 {
1280         flow_flag_set(flow, NOT_READY);
1281         list_add_tail(&flow->unready, unready_flows);
1282 }
1283
1284 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1285  * function.
1286  */
1287 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1288 {
1289         list_del(&flow->unready);
1290         flow_flag_clear(flow, NOT_READY);
1291 }
1292
1293 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1294 {
1295         struct mlx5_rep_uplink_priv *uplink_priv;
1296         struct mlx5e_rep_priv *rpriv;
1297         struct mlx5_eswitch *esw;
1298
1299         esw = flow->priv->mdev->priv.eswitch;
1300         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1301         uplink_priv = &rpriv->uplink_priv;
1302
1303         mutex_lock(&uplink_priv->unready_flows_lock);
1304         unready_flow_add(flow, &uplink_priv->unready_flows);
1305         mutex_unlock(&uplink_priv->unready_flows_lock);
1306 }
1307
1308 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1309 {
1310         struct mlx5_rep_uplink_priv *uplink_priv;
1311         struct mlx5e_rep_priv *rpriv;
1312         struct mlx5_eswitch *esw;
1313
1314         esw = flow->priv->mdev->priv.eswitch;
1315         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1316         uplink_priv = &rpriv->uplink_priv;
1317
1318         mutex_lock(&uplink_priv->unready_flows_lock);
1319         unready_flow_del(flow);
1320         mutex_unlock(&uplink_priv->unready_flows_lock);
1321 }
1322
1323 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
1324
1325 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1326 {
1327         struct mlx5_core_dev *out_mdev, *route_mdev;
1328         struct mlx5e_priv *out_priv, *route_priv;
1329
1330         out_priv = netdev_priv(out_dev);
1331         out_mdev = out_priv->mdev;
1332         route_priv = netdev_priv(route_dev);
1333         route_mdev = route_priv->mdev;
1334
1335         if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1336             route_mdev->coredev_type != MLX5_COREDEV_VF)
1337                 return false;
1338
1339         return same_hw_devs(out_priv, route_priv);
1340 }
1341
1342 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1343 {
1344         struct mlx5e_priv *out_priv, *route_priv;
1345         struct mlx5_core_dev *route_mdev;
1346         struct mlx5_eswitch *esw;
1347         u16 vhca_id;
1348         int err;
1349
1350         out_priv = netdev_priv(out_dev);
1351         esw = out_priv->mdev->priv.eswitch;
1352         route_priv = netdev_priv(route_dev);
1353         route_mdev = route_priv->mdev;
1354
1355         vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1356         err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1357         return err;
1358 }
1359
1360 static int
1361 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1362                       struct mlx5e_tc_flow *flow,
1363                       struct netlink_ext_ack *extack)
1364 {
1365         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1366         struct net_device *out_dev, *encap_dev = NULL;
1367         struct mlx5e_tc_flow_parse_attr *parse_attr;
1368         struct mlx5_flow_attr *attr = flow->attr;
1369         struct mlx5_esw_flow_attr *esw_attr;
1370         struct mlx5_fc *counter = NULL;
1371         struct mlx5e_rep_priv *rpriv;
1372         struct mlx5e_priv *out_priv;
1373         bool encap_valid = true;
1374         u32 max_prio, max_chain;
1375         int err = 0;
1376         int out_index;
1377
1378         /* We check chain range only for tc flows.
1379          * For ft flows, we checked attr->chain was originally 0 and set it to
1380          * FDB_FT_CHAIN which is outside tc range.
1381          * See mlx5e_rep_setup_ft_cb().
1382          */
1383         max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1384         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1385                 NL_SET_ERR_MSG_MOD(extack,
1386                                    "Requested chain is out of supported range");
1387                 return -EOPNOTSUPP;
1388         }
1389
1390         max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1391         if (attr->prio > max_prio) {
1392                 NL_SET_ERR_MSG_MOD(extack,
1393                                    "Requested priority is out of supported range");
1394                 return -EOPNOTSUPP;
1395         }
1396
1397         if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1398                 err = mlx5e_attach_decap(priv, flow, extack);
1399                 if (err)
1400                         return err;
1401         }
1402
1403         parse_attr = attr->parse_attr;
1404         esw_attr = attr->esw_attr;
1405
1406         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1407                 int mirred_ifindex;
1408
1409                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1410                         continue;
1411
1412                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1413                 out_dev = __dev_get_by_index(dev_net(priv->netdev),
1414                                              mirred_ifindex);
1415                 err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1416                                          extack, &encap_dev, &encap_valid);
1417                 if (err)
1418                         return err;
1419
1420                 out_priv = netdev_priv(encap_dev);
1421                 rpriv = out_priv->ppriv;
1422                 esw_attr->dests[out_index].rep = rpriv->rep;
1423                 esw_attr->dests[out_index].mdev = out_priv->mdev;
1424         }
1425
1426         err = mlx5_eswitch_add_vlan_action(esw, attr);
1427         if (err)
1428                 return err;
1429
1430         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1431             !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
1432                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1433                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1434                 if (err)
1435                         return err;
1436         }
1437
1438         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1439                 counter = mlx5_fc_create(esw_attr->counter_dev, true);
1440                 if (IS_ERR(counter))
1441                         return PTR_ERR(counter);
1442
1443                 attr->counter = counter;
1444         }
1445
1446         /* we get here if one of the following takes place:
1447          * (1) there's no error
1448          * (2) there's an encap action and we don't have valid neigh
1449          */
1450         if (!encap_valid)
1451                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1452         else
1453                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1454
1455         if (IS_ERR(flow->rule[0]))
1456                 return PTR_ERR(flow->rule[0]);
1457         else
1458                 flow_flag_set(flow, OFFLOADED);
1459
1460         return 0;
1461 }
1462
1463 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1464 {
1465         struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1466         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1467                                        spec->match_value,
1468                                        misc_parameters_3);
1469         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1470                                              headers_v,
1471                                              geneve_tlv_option_0_data);
1472
1473         return !!geneve_tlv_opt_0_data;
1474 }
1475
1476 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1477                                   struct mlx5e_tc_flow *flow)
1478 {
1479         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1480         struct mlx5_flow_attr *attr = flow->attr;
1481         int out_index;
1482
1483         mlx5e_put_flow_tunnel_id(flow);
1484
1485         if (flow_flag_test(flow, NOT_READY))
1486                 remove_unready_flow(flow);
1487
1488         if (mlx5e_is_offloaded_flow(flow)) {
1489                 if (flow_flag_test(flow, SLOW))
1490                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1491                 else
1492                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1493         }
1494
1495         if (mlx5_flow_has_geneve_opt(flow))
1496                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1497
1498         mlx5_eswitch_del_vlan_action(esw, attr);
1499
1500         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1501                 if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1502                         mlx5e_detach_encap(priv, flow, out_index);
1503                         kfree(attr->parse_attr->tun_info[out_index]);
1504                 }
1505         kvfree(attr->parse_attr);
1506         kvfree(attr->esw_attr->rx_tun_attr);
1507
1508         mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1509
1510         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1511                 mlx5e_detach_mod_hdr(priv, flow);
1512
1513         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1514                 mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
1515
1516         if (flow_flag_test(flow, L3_TO_L2_DECAP))
1517                 mlx5e_detach_decap(priv, flow);
1518
1519         kfree(flow->attr);
1520 }
1521
1522 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1523                               struct mlx5e_encap_entry *e,
1524                               struct list_head *flow_list)
1525 {
1526         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1527         struct mlx5_esw_flow_attr *esw_attr;
1528         struct mlx5_flow_handle *rule;
1529         struct mlx5_flow_attr *attr;
1530         struct mlx5_flow_spec *spec;
1531         struct mlx5e_tc_flow *flow;
1532         int err;
1533
1534         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1535                                                      e->reformat_type,
1536                                                      e->encap_size, e->encap_header,
1537                                                      MLX5_FLOW_NAMESPACE_FDB);
1538         if (IS_ERR(e->pkt_reformat)) {
1539                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1540                                PTR_ERR(e->pkt_reformat));
1541                 return;
1542         }
1543         e->flags |= MLX5_ENCAP_ENTRY_VALID;
1544         mlx5e_rep_queue_neigh_stats_work(priv);
1545
1546         list_for_each_entry(flow, flow_list, tmp_list) {
1547                 bool all_flow_encaps_valid = true;
1548                 int i;
1549
1550                 if (!mlx5e_is_offloaded_flow(flow))
1551                         continue;
1552                 attr = flow->attr;
1553                 esw_attr = attr->esw_attr;
1554                 spec = &attr->parse_attr->spec;
1555
1556                 esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1557                 esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1558                 /* Flow can be associated with multiple encap entries.
1559                  * Before offloading the flow verify that all of them have
1560                  * a valid neighbour.
1561                  */
1562                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1563                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1564                                 continue;
1565                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1566                                 all_flow_encaps_valid = false;
1567                                 break;
1568                         }
1569                 }
1570                 /* Do not offload flows with unresolved neighbors */
1571                 if (!all_flow_encaps_valid)
1572                         continue;
1573                 /* update from slow path rule to encap rule */
1574                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1575                 if (IS_ERR(rule)) {
1576                         err = PTR_ERR(rule);
1577                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1578                                        err);
1579                         continue;
1580                 }
1581
1582                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1583                 flow->rule[0] = rule;
1584                 /* was unset when slow path rule removed */
1585                 flow_flag_set(flow, OFFLOADED);
1586         }
1587 }
1588
1589 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1590                               struct mlx5e_encap_entry *e,
1591                               struct list_head *flow_list)
1592 {
1593         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1594         struct mlx5_esw_flow_attr *esw_attr;
1595         struct mlx5_flow_handle *rule;
1596         struct mlx5_flow_attr *attr;
1597         struct mlx5_flow_spec *spec;
1598         struct mlx5e_tc_flow *flow;
1599         int err;
1600
1601         list_for_each_entry(flow, flow_list, tmp_list) {
1602                 if (!mlx5e_is_offloaded_flow(flow))
1603                         continue;
1604                 attr = flow->attr;
1605                 esw_attr = attr->esw_attr;
1606                 spec = &attr->parse_attr->spec;
1607
1608                 /* update from encap rule to slow path rule */
1609                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1610                 /* mark the flow's encap dest as non-valid */
1611                 esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1612
1613                 if (IS_ERR(rule)) {
1614                         err = PTR_ERR(rule);
1615                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1616                                        err);
1617                         continue;
1618                 }
1619
1620                 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1621                 flow->rule[0] = rule;
1622                 /* was unset when fast path rule removed */
1623                 flow_flag_set(flow, OFFLOADED);
1624         }
1625
1626         /* we know that the encap is valid */
1627         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1628         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1629 }
1630
1631 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1632 {
1633         return flow->attr->counter;
1634 }
1635
1636 /* Takes reference to all flows attached to encap and adds the flows to
1637  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1638  */
1639 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1640 {
1641         struct encap_flow_item *efi;
1642         struct mlx5e_tc_flow *flow;
1643
1644         list_for_each_entry(efi, &e->flows, list) {
1645                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1646                 if (IS_ERR(mlx5e_flow_get(flow)))
1647                         continue;
1648                 wait_for_completion(&flow->init_done);
1649
1650                 flow->tmp_efi_index = efi->index;
1651                 list_add(&flow->tmp_list, flow_list);
1652         }
1653 }
1654
1655 /* Iterate over tmp_list of flows attached to flow_list head. */
1656 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1657 {
1658         struct mlx5e_tc_flow *flow, *tmp;
1659
1660         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1661                 mlx5e_flow_put(priv, flow);
1662 }
1663
1664 static struct mlx5e_encap_entry *
1665 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1666                            struct mlx5e_encap_entry *e)
1667 {
1668         struct mlx5e_encap_entry *next = NULL;
1669
1670 retry:
1671         rcu_read_lock();
1672
1673         /* find encap with non-zero reference counter value */
1674         for (next = e ?
1675                      list_next_or_null_rcu(&nhe->encap_list,
1676                                            &e->encap_list,
1677                                            struct mlx5e_encap_entry,
1678                                            encap_list) :
1679                      list_first_or_null_rcu(&nhe->encap_list,
1680                                             struct mlx5e_encap_entry,
1681                                             encap_list);
1682              next;
1683              next = list_next_or_null_rcu(&nhe->encap_list,
1684                                           &next->encap_list,
1685                                           struct mlx5e_encap_entry,
1686                                           encap_list))
1687                 if (mlx5e_encap_take(next))
1688                         break;
1689
1690         rcu_read_unlock();
1691
1692         /* release starting encap */
1693         if (e)
1694                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
1695         if (!next)
1696                 return next;
1697
1698         /* wait for encap to be fully initialized */
1699         wait_for_completion(&next->res_ready);
1700         /* continue searching if encap entry is not in valid state after completion */
1701         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1702                 e = next;
1703                 goto retry;
1704         }
1705
1706         return next;
1707 }
1708
1709 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1710 {
1711         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1712         struct mlx5e_encap_entry *e = NULL;
1713         struct mlx5e_tc_flow *flow;
1714         struct mlx5_fc *counter;
1715         struct neigh_table *tbl;
1716         bool neigh_used = false;
1717         struct neighbour *n;
1718         u64 lastuse;
1719
1720         if (m_neigh->family == AF_INET)
1721                 tbl = &arp_tbl;
1722 #if IS_ENABLED(CONFIG_IPV6)
1723         else if (m_neigh->family == AF_INET6)
1724                 tbl = ipv6_stub->nd_tbl;
1725 #endif
1726         else
1727                 return;
1728
1729         /* mlx5e_get_next_valid_encap() releases previous encap before returning
1730          * next one.
1731          */
1732         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1733                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1734                 struct encap_flow_item *efi, *tmp;
1735                 struct mlx5_eswitch *esw;
1736                 LIST_HEAD(flow_list);
1737
1738                 esw = priv->mdev->priv.eswitch;
1739                 mutex_lock(&esw->offloads.encap_tbl_lock);
1740                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1741                         flow = container_of(efi, struct mlx5e_tc_flow,
1742                                             encaps[efi->index]);
1743                         if (IS_ERR(mlx5e_flow_get(flow)))
1744                                 continue;
1745                         list_add(&flow->tmp_list, &flow_list);
1746
1747                         if (mlx5e_is_offloaded_flow(flow)) {
1748                                 counter = mlx5e_tc_get_counter(flow);
1749                                 lastuse = mlx5_fc_query_lastuse(counter);
1750                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1751                                         neigh_used = true;
1752                                         break;
1753                                 }
1754                         }
1755                 }
1756                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1757
1758                 mlx5e_put_encap_flow_list(priv, &flow_list);
1759                 if (neigh_used) {
1760                         /* release current encap before breaking the loop */
1761                         mlx5e_encap_put(priv, e);
1762                         break;
1763                 }
1764         }
1765
1766         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1767
1768         if (neigh_used) {
1769                 nhe->reported_lastuse = jiffies;
1770
1771                 /* find the relevant neigh according to the cached device and
1772                  * dst ip pair
1773                  */
1774                 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1775                 if (!n)
1776                         return;
1777
1778                 neigh_event_send(n, NULL);
1779                 neigh_release(n);
1780         }
1781 }
1782
1783 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1784 {
1785         WARN_ON(!list_empty(&e->flows));
1786
1787         if (e->compl_result > 0) {
1788                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1789
1790                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1791                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1792         }
1793
1794         kfree(e->tun_info);
1795         kfree(e->encap_header);
1796         kfree_rcu(e, rcu);
1797 }
1798
1799 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
1800                                 struct mlx5e_decap_entry *d)
1801 {
1802         WARN_ON(!list_empty(&d->flows));
1803
1804         if (!d->compl_result)
1805                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
1806
1807         kfree_rcu(d, rcu);
1808 }
1809
1810 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1811 {
1812         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1813
1814         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1815                 return;
1816         hash_del_rcu(&e->encap_hlist);
1817         mutex_unlock(&esw->offloads.encap_tbl_lock);
1818
1819         mlx5e_encap_dealloc(priv, e);
1820 }
1821
1822 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
1823 {
1824         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1825
1826         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
1827                 return;
1828         hash_del_rcu(&d->hlist);
1829         mutex_unlock(&esw->offloads.decap_tbl_lock);
1830
1831         mlx5e_decap_dealloc(priv, d);
1832 }
1833
1834 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1835                                struct mlx5e_tc_flow *flow, int out_index)
1836 {
1837         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1838         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1839
1840         /* flow wasn't fully initialized */
1841         if (!e)
1842                 return;
1843
1844         mutex_lock(&esw->offloads.encap_tbl_lock);
1845         list_del(&flow->encaps[out_index].list);
1846         flow->encaps[out_index].e = NULL;
1847         if (!refcount_dec_and_test(&e->refcnt)) {
1848                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1849                 return;
1850         }
1851         hash_del_rcu(&e->encap_hlist);
1852         mutex_unlock(&esw->offloads.encap_tbl_lock);
1853
1854         mlx5e_encap_dealloc(priv, e);
1855 }
1856
1857 static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1858                                struct mlx5e_tc_flow *flow)
1859 {
1860         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1861         struct mlx5e_decap_entry *d = flow->decap_reformat;
1862
1863         if (!d)
1864                 return;
1865
1866         mutex_lock(&esw->offloads.decap_tbl_lock);
1867         list_del(&flow->l3_to_l2_reformat);
1868         flow->decap_reformat = NULL;
1869
1870         if (!refcount_dec_and_test(&d->refcnt)) {
1871                 mutex_unlock(&esw->offloads.decap_tbl_lock);
1872                 return;
1873         }
1874         hash_del_rcu(&d->hlist);
1875         mutex_unlock(&esw->offloads.decap_tbl_lock);
1876
1877         mlx5e_decap_dealloc(priv, d);
1878 }
1879
1880 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1881 {
1882         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1883
1884         if (!flow_flag_test(flow, ESWITCH) ||
1885             !flow_flag_test(flow, DUP))
1886                 return;
1887
1888         mutex_lock(&esw->offloads.peer_mutex);
1889         list_del(&flow->peer);
1890         mutex_unlock(&esw->offloads.peer_mutex);
1891
1892         flow_flag_clear(flow, DUP);
1893
1894         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1895                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1896                 kfree(flow->peer_flow);
1897         }
1898
1899         flow->peer_flow = NULL;
1900 }
1901
1902 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1903 {
1904         struct mlx5_core_dev *dev = flow->priv->mdev;
1905         struct mlx5_devcom *devcom = dev->priv.devcom;
1906         struct mlx5_eswitch *peer_esw;
1907
1908         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1909         if (!peer_esw)
1910                 return;
1911
1912         __mlx5e_tc_del_fdb_peer_flow(flow);
1913         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1914 }
1915
1916 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1917                               struct mlx5e_tc_flow *flow)
1918 {
1919         if (mlx5e_is_eswitch_flow(flow)) {
1920                 mlx5e_tc_del_fdb_peer_flow(flow);
1921                 mlx5e_tc_del_fdb_flow(priv, flow);
1922         } else {
1923                 mlx5e_tc_del_nic_flow(priv, flow);
1924         }
1925 }
1926
1927 static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
1928 {
1929         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1930         struct flow_action *flow_action = &rule->action;
1931         const struct flow_action_entry *act;
1932         int i;
1933
1934         flow_action_for_each(i, act, flow_action) {
1935                 switch (act->id) {
1936                 case FLOW_ACTION_GOTO:
1937                         return true;
1938                 default:
1939                         continue;
1940                 }
1941         }
1942
1943         return false;
1944 }
1945
1946 static int
1947 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1948                                     struct flow_dissector_key_enc_opts *opts,
1949                                     struct netlink_ext_ack *extack,
1950                                     bool *dont_care)
1951 {
1952         struct geneve_opt *opt;
1953         int off = 0;
1954
1955         *dont_care = true;
1956
1957         while (opts->len > off) {
1958                 opt = (struct geneve_opt *)&opts->data[off];
1959
1960                 if (!(*dont_care) || opt->opt_class || opt->type ||
1961                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1962                         *dont_care = false;
1963
1964                         if (opt->opt_class != htons(U16_MAX) ||
1965                             opt->type != U8_MAX) {
1966                                 NL_SET_ERR_MSG(extack,
1967                                                "Partial match of tunnel options in chain > 0 isn't supported");
1968                                 netdev_warn(priv->netdev,
1969                                             "Partial match of tunnel options in chain > 0 isn't supported");
1970                                 return -EOPNOTSUPP;
1971                         }
1972                 }
1973
1974                 off += sizeof(struct geneve_opt) + opt->length * 4;
1975         }
1976
1977         return 0;
1978 }
1979
1980 #define COPY_DISSECTOR(rule, diss_key, dst)\
1981 ({ \
1982         struct flow_rule *__rule = (rule);\
1983         typeof(dst) __dst = dst;\
1984 \
1985         memcpy(__dst,\
1986                skb_flow_dissector_target(__rule->match.dissector,\
1987                                          diss_key,\
1988                                          __rule->match.key),\
1989                sizeof(*__dst));\
1990 })
1991
1992 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1993                                     struct mlx5e_tc_flow *flow,
1994                                     struct flow_cls_offload *f,
1995                                     struct net_device *filter_dev)
1996 {
1997         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1998         struct netlink_ext_ack *extack = f->common.extack;
1999         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2000         struct flow_match_enc_opts enc_opts_match;
2001         struct tunnel_match_enc_opts tun_enc_opts;
2002         struct mlx5_rep_uplink_priv *uplink_priv;
2003         struct mlx5_flow_attr *attr = flow->attr;
2004         struct mlx5e_rep_priv *uplink_rpriv;
2005         struct tunnel_match_key tunnel_key;
2006         bool enc_opts_is_dont_care = true;
2007         u32 tun_id, enc_opts_id = 0;
2008         struct mlx5_eswitch *esw;
2009         u32 value, mask;
2010         int err;
2011
2012         esw = priv->mdev->priv.eswitch;
2013         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2014         uplink_priv = &uplink_rpriv->uplink_priv;
2015
2016         memset(&tunnel_key, 0, sizeof(tunnel_key));
2017         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2018                        &tunnel_key.enc_control);
2019         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2020                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2021                                &tunnel_key.enc_ipv4);
2022         else
2023                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2024                                &tunnel_key.enc_ipv6);
2025         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2026         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2027                        &tunnel_key.enc_tp);
2028         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2029                        &tunnel_key.enc_key_id);
2030         tunnel_key.filter_ifindex = filter_dev->ifindex;
2031
2032         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2033         if (err)
2034                 return err;
2035
2036         flow_rule_match_enc_opts(rule, &enc_opts_match);
2037         err = enc_opts_is_dont_care_or_full_match(priv,
2038                                                   enc_opts_match.mask,
2039                                                   extack,
2040                                                   &enc_opts_is_dont_care);
2041         if (err)
2042                 goto err_enc_opts;
2043
2044         if (!enc_opts_is_dont_care) {
2045                 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2046                 memcpy(&tun_enc_opts.key, enc_opts_match.key,
2047                        sizeof(*enc_opts_match.key));
2048                 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2049                        sizeof(*enc_opts_match.mask));
2050
2051                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2052                                   &tun_enc_opts, &enc_opts_id);
2053                 if (err)
2054                         goto err_enc_opts;
2055         }
2056
2057         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2058         mask = enc_opts_id ? TUNNEL_ID_MASK :
2059                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2060
2061         if (attr->chain) {
2062                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2063                                             TUNNEL_TO_REG, value, mask);
2064         } else {
2065                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2066                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
2067                                                 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2068                                                 TUNNEL_TO_REG, value);
2069                 if (err)
2070                         goto err_set;
2071
2072                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2073         }
2074
2075         flow->tunnel_id = value;
2076         return 0;
2077
2078 err_set:
2079         if (enc_opts_id)
2080                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2081                                enc_opts_id);
2082 err_enc_opts:
2083         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2084         return err;
2085 }
2086
2087 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2088 {
2089         u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
2090         u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
2091         struct mlx5_rep_uplink_priv *uplink_priv;
2092         struct mlx5e_rep_priv *uplink_rpriv;
2093         struct mlx5_eswitch *esw;
2094
2095         esw = flow->priv->mdev->priv.eswitch;
2096         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2097         uplink_priv = &uplink_rpriv->uplink_priv;
2098
2099         if (tun_id)
2100                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2101         if (enc_opts_id)
2102                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2103                                enc_opts_id);
2104 }
2105
2106 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
2107 {
2108         return flow->tunnel_id;
2109 }
2110
2111 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2112                             struct flow_match_basic *match, bool outer,
2113                             void *headers_c, void *headers_v)
2114 {
2115         bool ip_version_cap;
2116
2117         ip_version_cap = outer ?
2118                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2119                                           ft_field_support.outer_ip_version) :
2120                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2121                                           ft_field_support.inner_ip_version);
2122
2123         if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2124             (match->key->n_proto == htons(ETH_P_IP) ||
2125              match->key->n_proto == htons(ETH_P_IPV6))) {
2126                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2127                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2128                          match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2129         } else {
2130                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2131                          ntohs(match->mask->n_proto));
2132                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2133                          ntohs(match->key->n_proto));
2134         }
2135 }
2136
2137 static u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2138 {
2139         void *headers_v;
2140         u16 ethertype;
2141         u8 ip_version;
2142
2143         if (outer)
2144                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2145         else
2146                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2147
2148         ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2149         /* Return ip_version converted from ethertype anyway */
2150         if (!ip_version) {
2151                 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2152                 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2153                         ip_version = 4;
2154                 else if (ethertype == ETH_P_IPV6)
2155                         ip_version = 6;
2156         }
2157         return ip_version;
2158 }
2159
2160 static int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
2161                                     struct mlx5_flow_spec *spec)
2162 {
2163         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
2164         struct mlx5_rx_tun_attr *tun_attr;
2165         void *daddr, *saddr;
2166         u8 ip_version;
2167
2168         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
2169         if (!tun_attr)
2170                 return -ENOMEM;
2171
2172         esw_attr->rx_tun_attr = tun_attr;
2173         ip_version = mlx5e_tc_get_ip_version(spec, true);
2174
2175         if (ip_version == 4) {
2176                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2177                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
2178                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2179                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
2180                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
2181                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
2182         }
2183 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
2184         else if (ip_version == 6) {
2185                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
2186
2187                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2188                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
2189                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2190                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
2191                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
2192                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
2193         }
2194 #endif
2195         return 0;
2196 }
2197
2198 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2199                              struct mlx5e_tc_flow *flow,
2200                              struct mlx5_flow_spec *spec,
2201                              struct flow_cls_offload *f,
2202                              struct net_device *filter_dev,
2203                              u8 *match_level,
2204                              bool *match_inner)
2205 {
2206         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2207         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2208         struct netlink_ext_ack *extack = f->common.extack;
2209         bool needs_mapping, sets_mapping;
2210         int err;
2211
2212         if (!mlx5e_is_eswitch_flow(flow))
2213                 return -EOPNOTSUPP;
2214
2215         needs_mapping = !!flow->attr->chain;
2216         sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
2217         *match_inner = !needs_mapping;
2218
2219         if ((needs_mapping || sets_mapping) &&
2220             !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2221                 NL_SET_ERR_MSG(extack,
2222                                "Chains on tunnel devices isn't supported without register loopback support");
2223                 netdev_warn(priv->netdev,
2224                             "Chains on tunnel devices isn't supported without register loopback support");
2225                 return -EOPNOTSUPP;
2226         }
2227
2228         if (!flow->attr->chain) {
2229                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2230                                          match_level);
2231                 if (err) {
2232                         NL_SET_ERR_MSG_MOD(extack,
2233                                            "Failed to parse tunnel attributes");
2234                         netdev_warn(priv->netdev,
2235                                     "Failed to parse tunnel attributes");
2236                         return err;
2237                 }
2238
2239                 /* With mpls over udp we decapsulate using packet reformat
2240                  * object
2241                  */
2242                 if (!netif_is_bareudp(filter_dev))
2243                         flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2244                 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2245                 if (err)
2246                         return err;
2247         } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
2248                 struct mlx5_flow_spec *tmp_spec;
2249
2250                 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2251                 if (!tmp_spec) {
2252                         NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
2253                         netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
2254                         return -ENOMEM;
2255                 }
2256                 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2257
2258                 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2259                 if (err) {
2260                         kvfree(tmp_spec);
2261                         NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2262                         netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2263                         return err;
2264                 }
2265                 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2266                 kvfree(tmp_spec);
2267                 if (err)
2268                         return err;
2269         }
2270
2271         if (!needs_mapping && !sets_mapping)
2272                 return 0;
2273
2274         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2275 }
2276
2277 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2278 {
2279         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2280                             inner_headers);
2281 }
2282
2283 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2284 {
2285         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2286                             inner_headers);
2287 }
2288
2289 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2290 {
2291         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2292                             outer_headers);
2293 }
2294
2295 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2296 {
2297         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2298                             outer_headers);
2299 }
2300
2301 static void *get_match_headers_value(u32 flags,
2302                                      struct mlx5_flow_spec *spec)
2303 {
2304         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2305                 get_match_inner_headers_value(spec) :
2306                 get_match_outer_headers_value(spec);
2307 }
2308
2309 static void *get_match_headers_criteria(u32 flags,
2310                                         struct mlx5_flow_spec *spec)
2311 {
2312         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2313                 get_match_inner_headers_criteria(spec) :
2314                 get_match_outer_headers_criteria(spec);
2315 }
2316
2317 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2318                                    struct flow_cls_offload *f)
2319 {
2320         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2321         struct netlink_ext_ack *extack = f->common.extack;
2322         struct net_device *ingress_dev;
2323         struct flow_match_meta match;
2324
2325         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2326                 return 0;
2327
2328         flow_rule_match_meta(rule, &match);
2329         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2330                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2331                 return -EOPNOTSUPP;
2332         }
2333
2334         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2335                                          match.key->ingress_ifindex);
2336         if (!ingress_dev) {
2337                 NL_SET_ERR_MSG_MOD(extack,
2338                                    "Can't find the ingress port to match on");
2339                 return -ENOENT;
2340         }
2341
2342         if (ingress_dev != filter_dev) {
2343                 NL_SET_ERR_MSG_MOD(extack,
2344                                    "Can't match on the ingress filter port");
2345                 return -EOPNOTSUPP;
2346         }
2347
2348         return 0;
2349 }
2350
2351 static bool skip_key_basic(struct net_device *filter_dev,
2352                            struct flow_cls_offload *f)
2353 {
2354         /* When doing mpls over udp decap, the user needs to provide
2355          * MPLS_UC as the protocol in order to be able to match on mpls
2356          * label fields.  However, the actual ethertype is IP so we want to
2357          * avoid matching on this, otherwise we'll fail the match.
2358          */
2359         if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2360                 return true;
2361
2362         return false;
2363 }
2364
2365 static int __parse_cls_flower(struct mlx5e_priv *priv,
2366                               struct mlx5e_tc_flow *flow,
2367                               struct mlx5_flow_spec *spec,
2368                               struct flow_cls_offload *f,
2369                               struct net_device *filter_dev,
2370                               u8 *inner_match_level, u8 *outer_match_level)
2371 {
2372         struct netlink_ext_ack *extack = f->common.extack;
2373         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2374                                        outer_headers);
2375         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2376                                        outer_headers);
2377         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2378                                     misc_parameters);
2379         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2380                                     misc_parameters);
2381         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2382         struct flow_dissector *dissector = rule->match.dissector;
2383         u16 addr_type = 0;
2384         u8 ip_proto = 0;
2385         u8 *match_level;
2386         int err;
2387
2388         match_level = outer_match_level;
2389
2390         if (dissector->used_keys &
2391             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2392               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2393               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2394               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2395               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2396               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2397               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2398               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2399               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2400               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2401               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2402               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2403               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2404               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2405               BIT(FLOW_DISSECTOR_KEY_TCP) |
2406               BIT(FLOW_DISSECTOR_KEY_IP)  |
2407               BIT(FLOW_DISSECTOR_KEY_CT) |
2408               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2409               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2410               BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2411                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2412                 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2413                            dissector->used_keys);
2414                 return -EOPNOTSUPP;
2415         }
2416
2417         if (mlx5e_get_tc_tun(filter_dev)) {
2418                 bool match_inner = false;
2419
2420                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2421                                         outer_match_level, &match_inner);
2422                 if (err)
2423                         return err;
2424
2425                 if (match_inner) {
2426                         /* header pointers should point to the inner headers
2427                          * if the packet was decapsulated already.
2428                          * outer headers are set by parse_tunnel_attr.
2429                          */
2430                         match_level = inner_match_level;
2431                         headers_c = get_match_inner_headers_criteria(spec);
2432                         headers_v = get_match_inner_headers_value(spec);
2433                 }
2434         }
2435
2436         err = mlx5e_flower_parse_meta(filter_dev, f);
2437         if (err)
2438                 return err;
2439
2440         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2441             !skip_key_basic(filter_dev, f)) {
2442                 struct flow_match_basic match;
2443
2444                 flow_rule_match_basic(rule, &match);
2445                 mlx5e_tc_set_ethertype(priv->mdev, &match,
2446                                        match_level == outer_match_level,
2447                                        headers_c, headers_v);
2448
2449                 if (match.mask->n_proto)
2450                         *match_level = MLX5_MATCH_L2;
2451         }
2452         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2453             is_vlan_dev(filter_dev)) {
2454                 struct flow_dissector_key_vlan filter_dev_mask;
2455                 struct flow_dissector_key_vlan filter_dev_key;
2456                 struct flow_match_vlan match;
2457
2458                 if (is_vlan_dev(filter_dev)) {
2459                         match.key = &filter_dev_key;
2460                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2461                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2462                         match.key->vlan_priority = 0;
2463                         match.mask = &filter_dev_mask;
2464                         memset(match.mask, 0xff, sizeof(*match.mask));
2465                         match.mask->vlan_priority = 0;
2466                 } else {
2467                         flow_rule_match_vlan(rule, &match);
2468                 }
2469                 if (match.mask->vlan_id ||
2470                     match.mask->vlan_priority ||
2471                     match.mask->vlan_tpid) {
2472                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2473                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2474                                          svlan_tag, 1);
2475                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2476                                          svlan_tag, 1);
2477                         } else {
2478                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2479                                          cvlan_tag, 1);
2480                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2481                                          cvlan_tag, 1);
2482                         }
2483
2484                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2485                                  match.mask->vlan_id);
2486                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2487                                  match.key->vlan_id);
2488
2489                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2490                                  match.mask->vlan_priority);
2491                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2492                                  match.key->vlan_priority);
2493
2494                         *match_level = MLX5_MATCH_L2;
2495                 }
2496         } else if (*match_level != MLX5_MATCH_NONE) {
2497                 /* cvlan_tag enabled in match criteria and
2498                  * disabled in match value means both S & C tags
2499                  * don't exist (untagged of both)
2500                  */
2501                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2502                 *match_level = MLX5_MATCH_L2;
2503         }
2504
2505         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2506                 struct flow_match_vlan match;
2507
2508                 flow_rule_match_cvlan(rule, &match);
2509                 if (match.mask->vlan_id ||
2510                     match.mask->vlan_priority ||
2511                     match.mask->vlan_tpid) {
2512                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2513                                 MLX5_SET(fte_match_set_misc, misc_c,
2514                                          outer_second_svlan_tag, 1);
2515                                 MLX5_SET(fte_match_set_misc, misc_v,
2516                                          outer_second_svlan_tag, 1);
2517                         } else {
2518                                 MLX5_SET(fte_match_set_misc, misc_c,
2519                                          outer_second_cvlan_tag, 1);
2520                                 MLX5_SET(fte_match_set_misc, misc_v,
2521                                          outer_second_cvlan_tag, 1);
2522                         }
2523
2524                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2525                                  match.mask->vlan_id);
2526                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2527                                  match.key->vlan_id);
2528                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2529                                  match.mask->vlan_priority);
2530                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2531                                  match.key->vlan_priority);
2532
2533                         *match_level = MLX5_MATCH_L2;
2534                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2535                 }
2536         }
2537
2538         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2539                 struct flow_match_eth_addrs match;
2540
2541                 flow_rule_match_eth_addrs(rule, &match);
2542                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2543                                              dmac_47_16),
2544                                 match.mask->dst);
2545                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2546                                              dmac_47_16),
2547                                 match.key->dst);
2548
2549                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2550                                              smac_47_16),
2551                                 match.mask->src);
2552                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2553                                              smac_47_16),
2554                                 match.key->src);
2555
2556                 if (!is_zero_ether_addr(match.mask->src) ||
2557                     !is_zero_ether_addr(match.mask->dst))
2558                         *match_level = MLX5_MATCH_L2;
2559         }
2560
2561         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2562                 struct flow_match_control match;
2563
2564                 flow_rule_match_control(rule, &match);
2565                 addr_type = match.key->addr_type;
2566
2567                 /* the HW doesn't support frag first/later */
2568                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2569                         return -EOPNOTSUPP;
2570
2571                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2572                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2573                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2574                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2575
2576                         /* the HW doesn't need L3 inline to match on frag=no */
2577                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2578                                 *match_level = MLX5_MATCH_L2;
2579         /* ***  L2 attributes parsing up to here *** */
2580                         else
2581                                 *match_level = MLX5_MATCH_L3;
2582                 }
2583         }
2584
2585         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2586                 struct flow_match_basic match;
2587
2588                 flow_rule_match_basic(rule, &match);
2589                 ip_proto = match.key->ip_proto;
2590
2591                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2592                          match.mask->ip_proto);
2593                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2594                          match.key->ip_proto);
2595
2596                 if (match.mask->ip_proto)
2597                         *match_level = MLX5_MATCH_L3;
2598         }
2599
2600         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2601                 struct flow_match_ipv4_addrs match;
2602
2603                 flow_rule_match_ipv4_addrs(rule, &match);
2604                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2605                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2606                        &match.mask->src, sizeof(match.mask->src));
2607                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2608                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2609                        &match.key->src, sizeof(match.key->src));
2610                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2611                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2612                        &match.mask->dst, sizeof(match.mask->dst));
2613                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2614                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2615                        &match.key->dst, sizeof(match.key->dst));
2616
2617                 if (match.mask->src || match.mask->dst)
2618                         *match_level = MLX5_MATCH_L3;
2619         }
2620
2621         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2622                 struct flow_match_ipv6_addrs match;
2623
2624                 flow_rule_match_ipv6_addrs(rule, &match);
2625                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2626                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2627                        &match.mask->src, sizeof(match.mask->src));
2628                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2629                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2630                        &match.key->src, sizeof(match.key->src));
2631
2632                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2633                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2634                        &match.mask->dst, sizeof(match.mask->dst));
2635                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2636                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2637                        &match.key->dst, sizeof(match.key->dst));
2638
2639                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2640                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2641                         *match_level = MLX5_MATCH_L3;
2642         }
2643
2644         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2645                 struct flow_match_ip match;
2646
2647                 flow_rule_match_ip(rule, &match);
2648                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2649                          match.mask->tos & 0x3);
2650                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2651                          match.key->tos & 0x3);
2652
2653                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2654                          match.mask->tos >> 2);
2655                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2656                          match.key->tos  >> 2);
2657
2658                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2659                          match.mask->ttl);
2660                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2661                          match.key->ttl);
2662
2663                 if (match.mask->ttl &&
2664                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2665                                                 ft_field_support.outer_ipv4_ttl)) {
2666                         NL_SET_ERR_MSG_MOD(extack,
2667                                            "Matching on TTL is not supported");
2668                         return -EOPNOTSUPP;
2669                 }
2670
2671                 if (match.mask->tos || match.mask->ttl)
2672                         *match_level = MLX5_MATCH_L3;
2673         }
2674
2675         /* ***  L3 attributes parsing up to here *** */
2676
2677         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2678                 struct flow_match_ports match;
2679
2680                 flow_rule_match_ports(rule, &match);
2681                 switch (ip_proto) {
2682                 case IPPROTO_TCP:
2683                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2684                                  tcp_sport, ntohs(match.mask->src));
2685                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2686                                  tcp_sport, ntohs(match.key->src));
2687
2688                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2689                                  tcp_dport, ntohs(match.mask->dst));
2690                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2691                                  tcp_dport, ntohs(match.key->dst));
2692                         break;
2693
2694                 case IPPROTO_UDP:
2695                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2696                                  udp_sport, ntohs(match.mask->src));
2697                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2698                                  udp_sport, ntohs(match.key->src));
2699
2700                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2701                                  udp_dport, ntohs(match.mask->dst));
2702                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2703                                  udp_dport, ntohs(match.key->dst));
2704                         break;
2705                 default:
2706                         NL_SET_ERR_MSG_MOD(extack,
2707                                            "Only UDP and TCP transports are supported for L4 matching");
2708                         netdev_err(priv->netdev,
2709                                    "Only UDP and TCP transport are supported\n");
2710                         return -EINVAL;
2711                 }
2712
2713                 if (match.mask->src || match.mask->dst)
2714                         *match_level = MLX5_MATCH_L4;
2715         }
2716
2717         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2718                 struct flow_match_tcp match;
2719
2720                 flow_rule_match_tcp(rule, &match);
2721                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2722                          ntohs(match.mask->flags));
2723                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2724                          ntohs(match.key->flags));
2725
2726                 if (match.mask->flags)
2727                         *match_level = MLX5_MATCH_L4;
2728         }
2729
2730         return 0;
2731 }
2732
2733 static int parse_cls_flower(struct mlx5e_priv *priv,
2734                             struct mlx5e_tc_flow *flow,
2735                             struct mlx5_flow_spec *spec,
2736                             struct flow_cls_offload *f,
2737                             struct net_device *filter_dev)
2738 {
2739         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2740         struct netlink_ext_ack *extack = f->common.extack;
2741         struct mlx5_core_dev *dev = priv->mdev;
2742         struct mlx5_eswitch *esw = dev->priv.eswitch;
2743         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2744         struct mlx5_eswitch_rep *rep;
2745         bool is_eswitch_flow;
2746         int err;
2747
2748         inner_match_level = MLX5_MATCH_NONE;
2749         outer_match_level = MLX5_MATCH_NONE;
2750
2751         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2752                                  &inner_match_level, &outer_match_level);
2753         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2754                                  outer_match_level : inner_match_level;
2755
2756         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2757         if (!err && is_eswitch_flow) {
2758                 rep = rpriv->rep;
2759                 if (rep->vport != MLX5_VPORT_UPLINK &&
2760                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2761                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2762                         NL_SET_ERR_MSG_MOD(extack,
2763                                            "Flow is not offloaded due to min inline setting");
2764                         netdev_warn(priv->netdev,
2765                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2766                                     non_tunnel_match_level, esw->offloads.inline_mode);
2767                         return -EOPNOTSUPP;
2768                 }
2769         }
2770
2771         flow->attr->inner_match_level = inner_match_level;
2772         flow->attr->outer_match_level = outer_match_level;
2773
2774
2775         return err;
2776 }
2777
2778 struct pedit_headers {
2779         struct ethhdr  eth;
2780         struct vlan_hdr vlan;
2781         struct iphdr   ip4;
2782         struct ipv6hdr ip6;
2783         struct tcphdr  tcp;
2784         struct udphdr  udp;
2785 };
2786
2787 struct pedit_headers_action {
2788         struct pedit_headers    vals;
2789         struct pedit_headers    masks;
2790         u32                     pedits;
2791 };
2792
2793 static int pedit_header_offsets[] = {
2794         [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2795         [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2796         [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2797         [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2798         [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2799 };
2800
2801 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2802
2803 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2804                          struct pedit_headers_action *hdrs)
2805 {
2806         u32 *curr_pmask, *curr_pval;
2807
2808         curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2809         curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2810
2811         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2812                 goto out_err;
2813
2814         *curr_pmask |= mask;
2815         *curr_pval  |= (val & mask);
2816
2817         return 0;
2818
2819 out_err:
2820         return -EOPNOTSUPP;
2821 }
2822
2823 struct mlx5_fields {
2824         u8  field;
2825         u8  field_bsize;
2826         u32 field_mask;
2827         u32 offset;
2828         u32 match_offset;
2829 };
2830
2831 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2832                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2833                  offsetof(struct pedit_headers, field) + (off), \
2834                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2835
2836 /* masked values are the same and there are no rewrites that do not have a
2837  * match.
2838  */
2839 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2840         type matchmaskx = *(type *)(matchmaskp); \
2841         type matchvalx = *(type *)(matchvalp); \
2842         type maskx = *(type *)(maskp); \
2843         type valx = *(type *)(valp); \
2844         \
2845         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2846                                                                  matchmaskx)); \
2847 })
2848
2849 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2850                          void *matchmaskp, u8 bsize)
2851 {
2852         bool same = false;
2853
2854         switch (bsize) {
2855         case 8:
2856                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2857                 break;
2858         case 16:
2859                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2860                 break;
2861         case 32:
2862                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2863                 break;
2864         }
2865
2866         return same;
2867 }
2868
2869 static struct mlx5_fields fields[] = {
2870         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2871         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2872         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2873         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2874         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2875         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2876
2877         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2878         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2879         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2880         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2881
2882         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2883                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2884         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2885                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2886         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2887                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2888         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2889                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2890         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2891                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2892         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2893                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2894         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2895                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2896         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2897                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2898         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2899         OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
2900
2901         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2902         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2903         /* in linux iphdr tcp_flags is 8 bits long */
2904         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2905
2906         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2907         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2908 };
2909
2910 static unsigned long mask_to_le(unsigned long mask, int size)
2911 {
2912         __be32 mask_be32;
2913         __be16 mask_be16;
2914
2915         if (size == 32) {
2916                 mask_be32 = (__force __be32)(mask);
2917                 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2918         } else if (size == 16) {
2919                 mask_be32 = (__force __be32)(mask);
2920                 mask_be16 = *(__be16 *)&mask_be32;
2921                 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2922         }
2923
2924         return mask;
2925 }
2926 static int offload_pedit_fields(struct mlx5e_priv *priv,
2927                                 int namespace,
2928                                 struct pedit_headers_action *hdrs,
2929                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2930                                 u32 *action_flags,
2931                                 struct netlink_ext_ack *extack)
2932 {
2933         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2934         int i, action_size, first, last, next_z;
2935         void *headers_c, *headers_v, *action, *vals_p;
2936         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2937         struct mlx5e_tc_mod_hdr_acts *mod_acts;
2938         struct mlx5_fields *f;
2939         unsigned long mask, field_mask;
2940         int err;
2941         u8 cmd;
2942
2943         mod_acts = &parse_attr->mod_hdr_acts;
2944         headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2945         headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2946
2947         set_masks = &hdrs[0].masks;
2948         add_masks = &hdrs[1].masks;
2949         set_vals = &hdrs[0].vals;
2950         add_vals = &hdrs[1].vals;
2951
2952         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2953
2954         for (i = 0; i < ARRAY_SIZE(fields); i++) {
2955                 bool skip;
2956
2957                 f = &fields[i];
2958                 /* avoid seeing bits set from previous iterations */
2959                 s_mask = 0;
2960                 a_mask = 0;
2961
2962                 s_masks_p = (void *)set_masks + f->offset;
2963                 a_masks_p = (void *)add_masks + f->offset;
2964
2965                 s_mask = *s_masks_p & f->field_mask;
2966                 a_mask = *a_masks_p & f->field_mask;
2967
2968                 if (!s_mask && !a_mask) /* nothing to offload here */
2969                         continue;
2970
2971                 if (s_mask && a_mask) {
2972                         NL_SET_ERR_MSG_MOD(extack,
2973                                            "can't set and add to the same HW field");
2974                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2975                         return -EOPNOTSUPP;
2976                 }
2977
2978                 skip = false;
2979                 if (s_mask) {
2980                         void *match_mask = headers_c + f->match_offset;
2981                         void *match_val = headers_v + f->match_offset;
2982
2983                         cmd  = MLX5_ACTION_TYPE_SET;
2984                         mask = s_mask;
2985                         vals_p = (void *)set_vals + f->offset;
2986                         /* don't rewrite if we have a match on the same value */
2987                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
2988                                          match_mask, f->field_bsize))
2989                                 skip = true;
2990                         /* clear to denote we consumed this field */
2991                         *s_masks_p &= ~f->field_mask;
2992                 } else {
2993                         cmd  = MLX5_ACTION_TYPE_ADD;
2994                         mask = a_mask;
2995                         vals_p = (void *)add_vals + f->offset;
2996                         /* add 0 is no change */
2997                         if ((*(u32 *)vals_p & f->field_mask) == 0)
2998                                 skip = true;
2999                         /* clear to denote we consumed this field */
3000                         *a_masks_p &= ~f->field_mask;
3001                 }
3002                 if (skip)
3003                         continue;
3004
3005                 mask = mask_to_le(mask, f->field_bsize);
3006
3007                 first = find_first_bit(&mask, f->field_bsize);
3008                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3009                 last  = find_last_bit(&mask, f->field_bsize);
3010                 if (first < next_z && next_z < last) {
3011                         NL_SET_ERR_MSG_MOD(extack,
3012                                            "rewrite of few sub-fields isn't supported");
3013                         printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3014                                mask);
3015                         return -EOPNOTSUPP;
3016                 }
3017
3018                 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
3019                 if (err) {
3020                         NL_SET_ERR_MSG_MOD(extack,
3021                                            "too many pedit actions, can't offload");
3022                         mlx5_core_warn(priv->mdev,
3023                                        "mlx5: parsed %d pedit actions, can't do more\n",
3024                                        mod_acts->num_actions);
3025                         return err;
3026                 }
3027
3028                 action = mod_acts->actions +
3029                          (mod_acts->num_actions * action_size);
3030                 MLX5_SET(set_action_in, action, action_type, cmd);
3031                 MLX5_SET(set_action_in, action, field, f->field);
3032
3033                 if (cmd == MLX5_ACTION_TYPE_SET) {
3034                         int start;
3035
3036                         field_mask = mask_to_le(f->field_mask, f->field_bsize);
3037
3038                         /* if field is bit sized it can start not from first bit */
3039                         start = find_first_bit(&field_mask, f->field_bsize);
3040
3041                         MLX5_SET(set_action_in, action, offset, first - start);
3042                         /* length is num of bits to be written, zero means length of 32 */
3043                         MLX5_SET(set_action_in, action, length, (last - first + 1));
3044                 }
3045
3046                 if (f->field_bsize == 32)
3047                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3048                 else if (f->field_bsize == 16)
3049                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3050                 else if (f->field_bsize == 8)
3051                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3052
3053                 ++mod_acts->num_actions;
3054         }
3055
3056         return 0;
3057 }
3058
3059 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
3060                                                   int namespace)
3061 {
3062         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
3063                 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
3064         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
3065                 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
3066 }
3067
3068 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
3069                           int namespace,
3070                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
3071 {
3072         int action_size, new_num_actions, max_hw_actions;
3073         size_t new_sz, old_sz;
3074         void *ret;
3075
3076         if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
3077                 return 0;
3078
3079         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
3080
3081         max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
3082                                                                 namespace);
3083         new_num_actions = min(max_hw_actions,
3084                               mod_hdr_acts->actions ?
3085                               mod_hdr_acts->max_actions * 2 : 1);
3086         if (mod_hdr_acts->max_actions == new_num_actions)
3087                 return -ENOSPC;
3088
3089         new_sz = action_size * new_num_actions;
3090         old_sz = mod_hdr_acts->max_actions * action_size;
3091         ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
3092         if (!ret)
3093                 return -ENOMEM;
3094
3095         memset(ret + old_sz, 0, new_sz - old_sz);
3096         mod_hdr_acts->actions = ret;
3097         mod_hdr_acts->max_actions = new_num_actions;
3098
3099         return 0;
3100 }
3101
3102 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
3103 {
3104         kfree(mod_hdr_acts->actions);
3105         mod_hdr_acts->actions = NULL;
3106         mod_hdr_acts->num_actions = 0;
3107         mod_hdr_acts->max_actions = 0;
3108 }
3109
3110 static const struct pedit_headers zero_masks = {};
3111
3112 static int
3113 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
3114                           const struct flow_action_entry *act, int namespace,
3115                           struct mlx5e_tc_flow_parse_attr *parse_attr,
3116                           struct pedit_headers_action *hdrs,
3117                           struct netlink_ext_ack *extack)
3118 {
3119         u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
3120         int err = -EOPNOTSUPP;
3121         u32 mask, val, offset;
3122         u8 htype;
3123
3124         htype = act->mangle.htype;
3125         err = -EOPNOTSUPP; /* can't be all optimistic */
3126
3127         if (htype == FLOW_ACT_MANGLE_UNSPEC) {
3128                 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
3129                 goto out_err;
3130         }
3131
3132         if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
3133                 NL_SET_ERR_MSG_MOD(extack,
3134                                    "The pedit offload action is not supported");
3135                 goto out_err;
3136         }
3137
3138         mask = act->mangle.mask;
3139         val = act->mangle.val;
3140         offset = act->mangle.offset;
3141
3142         err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
3143         if (err)
3144                 goto out_err;
3145
3146         hdrs[cmd].pedits++;
3147
3148         return 0;
3149 out_err:
3150         return err;
3151 }
3152
3153 static int
3154 parse_pedit_to_reformat(struct mlx5e_priv *priv,
3155                         const struct flow_action_entry *act,
3156                         struct mlx5e_tc_flow_parse_attr *parse_attr,
3157                         struct netlink_ext_ack *extack)
3158 {
3159         u32 mask, val, offset;
3160         u32 *p;
3161
3162         if (act->id != FLOW_ACTION_MANGLE)
3163                 return -EOPNOTSUPP;
3164
3165         if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
3166                 NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
3167                 return -EOPNOTSUPP;
3168         }
3169
3170         mask = ~act->mangle.mask;
3171         val = act->mangle.val;
3172         offset = act->mangle.offset;
3173         p = (u32 *)&parse_attr->eth;
3174         *(p + (offset >> 2)) |= (val & mask);
3175
3176         return 0;
3177 }
3178
3179 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
3180                                  const struct flow_action_entry *act, int namespace,
3181                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3182                                  struct pedit_headers_action *hdrs,
3183                                  struct mlx5e_tc_flow *flow,
3184                                  struct netlink_ext_ack *extack)
3185 {
3186         if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3187                 return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3188
3189         return parse_pedit_to_modify_hdr(priv, act, namespace,
3190                                          parse_attr, hdrs, extack);
3191 }
3192
3193 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3194                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3195                                  struct pedit_headers_action *hdrs,
3196                                  u32 *action_flags,
3197                                  struct netlink_ext_ack *extack)
3198 {
3199         struct pedit_headers *cmd_masks;
3200         int err;
3201         u8 cmd;
3202
3203         err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3204                                    action_flags, extack);
3205         if (err < 0)
3206                 goto out_dealloc_parsed_actions;
3207
3208         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3209                 cmd_masks = &hdrs[cmd].masks;
3210                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3211                         NL_SET_ERR_MSG_MOD(extack,
3212                                            "attempt to offload an unsupported field");
3213                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3214                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3215                                        16, 1, cmd_masks, sizeof(zero_masks), true);
3216                         err = -EOPNOTSUPP;
3217                         goto out_dealloc_parsed_actions;
3218                 }
3219         }
3220
3221         return 0;
3222
3223 out_dealloc_parsed_actions:
3224         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3225         return err;
3226 }
3227
3228 static bool csum_offload_supported(struct mlx5e_priv *priv,
3229                                    u32 action,
3230                                    u32 update_flags,
3231                                    struct netlink_ext_ack *extack)
3232 {
3233         u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
3234                          TCA_CSUM_UPDATE_FLAG_UDP;
3235
3236         /*  The HW recalcs checksums only if re-writing headers */
3237         if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3238                 NL_SET_ERR_MSG_MOD(extack,
3239                                    "TC csum action is only offloaded with pedit");
3240                 netdev_warn(priv->netdev,
3241                             "TC csum action is only offloaded with pedit\n");
3242                 return false;
3243         }
3244
3245         if (update_flags & ~prot_flags) {
3246                 NL_SET_ERR_MSG_MOD(extack,
3247                                    "can't offload TC csum action for some header/s");
3248                 netdev_warn(priv->netdev,
3249                             "can't offload TC csum action for some header/s - flags %#x\n",
3250                             update_flags);
3251                 return false;
3252         }
3253
3254         return true;
3255 }
3256
3257 struct ip_ttl_word {
3258         __u8    ttl;
3259         __u8    protocol;
3260         __sum16 check;
3261 };
3262
3263 struct ipv6_hoplimit_word {
3264         __be16  payload_len;
3265         __u8    nexthdr;
3266         __u8    hop_limit;
3267 };
3268
3269 static int is_action_keys_supported(const struct flow_action_entry *act,
3270                                     bool ct_flow, bool *modify_ip_header,
3271                                     bool *modify_tuple,
3272                                     struct netlink_ext_ack *extack)
3273 {
3274         u32 mask, offset;
3275         u8 htype;
3276
3277         htype = act->mangle.htype;
3278         offset = act->mangle.offset;
3279         mask = ~act->mangle.mask;
3280         /* For IPv4 & IPv6 header check 4 byte word,
3281          * to determine that modified fields
3282          * are NOT ttl & hop_limit only.
3283          */
3284         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3285                 struct ip_ttl_word *ttl_word =
3286                         (struct ip_ttl_word *)&mask;
3287
3288                 if (offset != offsetof(struct iphdr, ttl) ||
3289                     ttl_word->protocol ||
3290                     ttl_word->check) {
3291                         *modify_ip_header = true;
3292                 }
3293
3294                 if (offset >= offsetof(struct iphdr, saddr))
3295                         *modify_tuple = true;
3296
3297                 if (ct_flow && *modify_tuple) {
3298                         NL_SET_ERR_MSG_MOD(extack,
3299                                            "can't offload re-write of ipv4 address with action ct");
3300                         return -EOPNOTSUPP;
3301                 }
3302         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3303                 struct ipv6_hoplimit_word *hoplimit_word =
3304                         (struct ipv6_hoplimit_word *)&mask;
3305
3306                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3307                     hoplimit_word->payload_len ||
3308                     hoplimit_word->nexthdr) {
3309                         *modify_ip_header = true;
3310                 }
3311
3312                 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3313                         *modify_tuple = true;
3314
3315                 if (ct_flow && *modify_tuple) {
3316                         NL_SET_ERR_MSG_MOD(extack,
3317                                            "can't offload re-write of ipv6 address with action ct");
3318                         return -EOPNOTSUPP;
3319                 }
3320         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3321                    htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3322                 *modify_tuple = true;
3323                 if (ct_flow) {
3324                         NL_SET_ERR_MSG_MOD(extack,
3325                                            "can't offload re-write of transport header ports with action ct");
3326                         return -EOPNOTSUPP;
3327                 }
3328         }
3329
3330         return 0;
3331 }
3332
3333 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3334                                           struct mlx5_flow_spec *spec,
3335                                           struct flow_action *flow_action,
3336                                           u32 actions, bool ct_flow,
3337                                           bool ct_clear,
3338                                           struct netlink_ext_ack *extack)
3339 {
3340         const struct flow_action_entry *act;
3341         bool modify_ip_header, modify_tuple;
3342         void *headers_c;
3343         void *headers_v;
3344         u16 ethertype;
3345         u8 ip_proto;
3346         int i, err;
3347
3348         headers_c = get_match_headers_criteria(actions, spec);
3349         headers_v = get_match_headers_value(actions, spec);
3350         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3351
3352         /* for non-IP we only re-write MACs, so we're okay */
3353         if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3354             ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3355                 goto out_ok;
3356
3357         modify_ip_header = false;
3358         modify_tuple = false;
3359         flow_action_for_each(i, act, flow_action) {
3360                 if (act->id != FLOW_ACTION_MANGLE &&
3361                     act->id != FLOW_ACTION_ADD)
3362                         continue;
3363
3364                 err = is_action_keys_supported(act, ct_flow,
3365                                                &modify_ip_header,
3366                                                &modify_tuple, extack);
3367                 if (err)
3368                         return err;
3369         }
3370
3371         /* Add ct_state=-trk match so it will be offloaded for non ct flows
3372          * (or after clear action), as otherwise, since the tuple is changed,
3373          *  we can't restore ct state
3374          */
3375         if (!ct_clear && modify_tuple &&
3376             mlx5_tc_ct_add_no_trk_match(spec)) {
3377                 NL_SET_ERR_MSG_MOD(extack,
3378                                    "can't offload tuple modify header with ct matches");
3379                 netdev_info(priv->netdev,
3380                             "can't offload tuple modify header with ct matches");
3381                 return false;
3382         }
3383
3384         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3385         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3386             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3387                 NL_SET_ERR_MSG_MOD(extack,
3388                                    "can't offload re-write of non TCP/UDP");
3389                 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3390                             ip_proto);
3391                 return false;
3392         }
3393
3394 out_ok:
3395         return true;
3396 }
3397
3398 static bool actions_match_supported(struct mlx5e_priv *priv,
3399                                     struct flow_action *flow_action,
3400                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
3401                                     struct mlx5e_tc_flow *flow,
3402                                     struct netlink_ext_ack *extack)
3403 {
3404         bool ct_flow = false, ct_clear = false;
3405         u32 actions;
3406
3407         ct_clear = flow->attr->ct_attr.ct_action &
3408                 TCA_CT_ACT_CLEAR;
3409         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3410         actions = flow->attr->action;
3411
3412         if (mlx5e_is_eswitch_flow(flow)) {
3413                 if (flow->attr->esw_attr->split_count && ct_flow) {
3414                         /* All registers used by ct are cleared when using
3415                          * split rules.
3416                          */
3417                         NL_SET_ERR_MSG_MOD(extack,
3418                                            "Can't offload mirroring with action ct");
3419                         return false;
3420                 }
3421         }
3422
3423         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3424                 return modify_header_match_supported(priv, &parse_attr->spec,
3425                                                      flow_action, actions,
3426                                                      ct_flow, ct_clear,
3427                                                      extack);
3428
3429         return true;
3430 }
3431
3432 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3433 {
3434         return priv->mdev == peer_priv->mdev;
3435 }
3436
3437 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3438 {
3439         struct mlx5_core_dev *fmdev, *pmdev;
3440         u64 fsystem_guid, psystem_guid;
3441
3442         fmdev = priv->mdev;
3443         pmdev = peer_priv->mdev;
3444
3445         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3446         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3447
3448         return (fsystem_guid == psystem_guid);
3449 }
3450
3451 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3452                                    const struct flow_action_entry *act,
3453                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
3454                                    struct pedit_headers_action *hdrs,
3455                                    u32 *action, struct netlink_ext_ack *extack)
3456 {
3457         u16 mask16 = VLAN_VID_MASK;
3458         u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3459         const struct flow_action_entry pedit_act = {
3460                 .id = FLOW_ACTION_MANGLE,
3461                 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3462                 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3463                 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3464                 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3465         };
3466         u8 match_prio_mask, match_prio_val;
3467         void *headers_c, *headers_v;
3468         int err;
3469
3470         headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3471         headers_v = get_match_headers_value(*action, &parse_attr->spec);
3472
3473         if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3474               MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3475                 NL_SET_ERR_MSG_MOD(extack,
3476                                    "VLAN rewrite action must have VLAN protocol match");
3477                 return -EOPNOTSUPP;
3478         }
3479
3480         match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3481         match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3482         if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3483                 NL_SET_ERR_MSG_MOD(extack,
3484                                    "Changing VLAN prio is not supported");
3485                 return -EOPNOTSUPP;
3486         }
3487
3488         err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3489         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3490
3491         return err;
3492 }
3493
3494 static int
3495 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3496                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3497                                  struct pedit_headers_action *hdrs,
3498                                  u32 *action, struct netlink_ext_ack *extack)
3499 {
3500         const struct flow_action_entry prio_tag_act = {
3501                 .vlan.vid = 0,
3502                 .vlan.prio =
3503                         MLX5_GET(fte_match_set_lyr_2_4,
3504                                  get_match_headers_value(*action,
3505                                                          &parse_attr->spec),
3506                                  first_prio) &
3507                         MLX5_GET(fte_match_set_lyr_2_4,
3508                                  get_match_headers_criteria(*action,
3509                                                             &parse_attr->spec),
3510                                  first_prio),
3511         };
3512
3513         return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3514                                        &prio_tag_act, parse_attr, hdrs, action,
3515                                        extack);
3516 }
3517
3518 static int validate_goto_chain(struct mlx5e_priv *priv,
3519                                struct mlx5e_tc_flow *flow,
3520                                const struct flow_action_entry *act,
3521                                u32 actions,
3522                                struct netlink_ext_ack *extack)
3523 {
3524         bool is_esw = mlx5e_is_eswitch_flow(flow);
3525         struct mlx5_flow_attr *attr = flow->attr;
3526         bool ft_flow = mlx5e_is_ft_flow(flow);
3527         u32 dest_chain = act->chain_index;
3528         struct mlx5_fs_chains *chains;
3529         struct mlx5_eswitch *esw;
3530         u32 reformat_and_fwd;
3531         u32 max_chain;
3532
3533         esw = priv->mdev->priv.eswitch;
3534         chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3535         max_chain = mlx5_chains_get_chain_range(chains);
3536         reformat_and_fwd = is_esw ?
3537                            MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3538                            MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3539
3540         if (ft_flow) {
3541                 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3542                 return -EOPNOTSUPP;
3543         }
3544
3545         if (!mlx5_chains_backwards_supported(chains) &&
3546             dest_chain <= attr->chain) {
3547                 NL_SET_ERR_MSG_MOD(extack,
3548                                    "Goto lower numbered chain isn't supported");
3549                 return -EOPNOTSUPP;
3550         }
3551
3552         if (dest_chain > max_chain) {
3553                 NL_SET_ERR_MSG_MOD(extack,
3554                                    "Requested destination chain is out of supported range");
3555                 return -EOPNOTSUPP;
3556         }
3557
3558         if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3559                        MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3560             !reformat_and_fwd) {
3561                 NL_SET_ERR_MSG_MOD(extack,
3562                                    "Goto chain is not allowed if action has reformat or decap");
3563                 return -EOPNOTSUPP;
3564         }
3565
3566         return 0;
3567 }
3568
3569 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3570                                 struct flow_action *flow_action,
3571                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3572                                 struct mlx5e_tc_flow *flow,
3573                                 struct netlink_ext_ack *extack)
3574 {
3575         struct mlx5_flow_attr *attr = flow->attr;
3576         struct pedit_headers_action hdrs[2] = {};
3577         const struct flow_action_entry *act;
3578         struct mlx5_nic_flow_attr *nic_attr;
3579         u32 action = 0;
3580         int err, i;
3581
3582         if (!flow_action_has_entries(flow_action))
3583                 return -EINVAL;
3584
3585         if (!flow_action_hw_stats_check(flow_action, extack,
3586                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
3587                 return -EOPNOTSUPP;
3588
3589         nic_attr = attr->nic_attr;
3590
3591         nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3592
3593         flow_action_for_each(i, act, flow_action) {
3594                 switch (act->id) {
3595                 case FLOW_ACTION_ACCEPT:
3596                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3597                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3598                         break;
3599                 case FLOW_ACTION_DROP:
3600                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3601                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
3602                                                flow_table_properties_nic_receive.flow_counter))
3603                                 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3604                         break;
3605                 case FLOW_ACTION_MANGLE:
3606                 case FLOW_ACTION_ADD:
3607                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3608                                                     parse_attr, hdrs, NULL, extack);
3609                         if (err)
3610                                 return err;
3611
3612                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3613                         break;
3614                 case FLOW_ACTION_VLAN_MANGLE:
3615                         err = add_vlan_rewrite_action(priv,
3616                                                       MLX5_FLOW_NAMESPACE_KERNEL,
3617                                                       act, parse_attr, hdrs,
3618                                                       &action, extack);
3619                         if (err)
3620                                 return err;
3621
3622                         break;
3623                 case FLOW_ACTION_CSUM:
3624                         if (csum_offload_supported(priv, action,
3625                                                    act->csum_flags,
3626                                                    extack))
3627                                 break;
3628
3629                         return -EOPNOTSUPP;
3630                 case FLOW_ACTION_REDIRECT: {
3631                         struct net_device *peer_dev = act->dev;
3632
3633                         if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3634                             same_hw_devs(priv, netdev_priv(peer_dev))) {
3635                                 parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3636                                 flow_flag_set(flow, HAIRPIN);
3637                                 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3638                                           MLX5_FLOW_CONTEXT_ACTION_COUNT;
3639                         } else {
3640                                 NL_SET_ERR_MSG_MOD(extack,
3641                                                    "device is not on same HW, can't offload");
3642                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3643                                             peer_dev->name);
3644                                 return -EINVAL;
3645                         }
3646                         }
3647                         break;
3648                 case FLOW_ACTION_MARK: {
3649                         u32 mark = act->mark;
3650
3651                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3652                                 NL_SET_ERR_MSG_MOD(extack,
3653                                                    "Bad flow mark - only 16 bit is supported");
3654                                 return -EINVAL;
3655                         }
3656
3657                         nic_attr->flow_tag = mark;
3658                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3659                         }
3660                         break;
3661                 case FLOW_ACTION_GOTO:
3662                         err = validate_goto_chain(priv, flow, act, action,
3663                                                   extack);
3664                         if (err)
3665                                 return err;
3666
3667                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3668                         attr->dest_chain = act->chain_index;
3669                         break;
3670                 case FLOW_ACTION_CT:
3671                         err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3672                         if (err)
3673                                 return err;
3674
3675                         flow_flag_set(flow, CT);
3676                         break;
3677                 default:
3678                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3679                         return -EOPNOTSUPP;
3680                 }
3681         }
3682
3683         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3684             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3685                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3686                                             parse_attr, hdrs, &action, extack);
3687                 if (err)
3688                         return err;
3689                 /* in case all pedit actions are skipped, remove the MOD_HDR
3690                  * flag.
3691                  */
3692                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3693                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3694                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3695                 }
3696         }
3697
3698         attr->action = action;
3699
3700         if (attr->dest_chain) {
3701                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3702                         NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3703                         return -EOPNOTSUPP;
3704                 }
3705                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3706         }
3707
3708         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3709                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3710
3711         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3712                 return -EOPNOTSUPP;
3713
3714         return 0;
3715 }
3716
3717 struct encap_key {
3718         const struct ip_tunnel_key *ip_tun_key;
3719         struct mlx5e_tc_tunnel *tc_tunnel;
3720 };
3721
3722 static inline int cmp_encap_info(struct encap_key *a,
3723                                  struct encap_key *b)
3724 {
3725         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3726                a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
3727 }
3728
3729 static inline int cmp_decap_info(struct mlx5e_decap_key *a,
3730                                  struct mlx5e_decap_key *b)
3731 {
3732         return memcmp(&a->key, &b->key, sizeof(b->key));
3733 }
3734
3735 static inline int hash_encap_info(struct encap_key *key)
3736 {
3737         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3738                      key->tc_tunnel->tunnel_type);
3739 }
3740
3741 static inline int hash_decap_info(struct mlx5e_decap_key *key)
3742 {
3743         return jhash(&key->key, sizeof(key->key), 0);
3744 }
3745
3746 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3747                                   struct net_device *peer_netdev)
3748 {
3749         struct mlx5e_priv *peer_priv;
3750
3751         peer_priv = netdev_priv(peer_netdev);
3752
3753         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3754                 mlx5e_eswitch_vf_rep(priv->netdev) &&
3755                 mlx5e_eswitch_vf_rep(peer_netdev) &&
3756                 same_hw_devs(priv, peer_priv));
3757 }
3758
3759 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3760 {
3761         return refcount_inc_not_zero(&e->refcnt);
3762 }
3763
3764 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
3765 {
3766         return refcount_inc_not_zero(&e->refcnt);
3767 }
3768
3769 static struct mlx5e_encap_entry *
3770 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3771                 uintptr_t hash_key)
3772 {
3773         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3774         struct mlx5e_encap_entry *e;
3775         struct encap_key e_key;
3776
3777         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3778                                    encap_hlist, hash_key) {
3779                 e_key.ip_tun_key = &e->tun_info->key;
3780                 e_key.tc_tunnel = e->tunnel;
3781                 if (!cmp_encap_info(&e_key, key) &&
3782                     mlx5e_encap_take(e))
3783                         return e;
3784         }
3785
3786         return NULL;
3787 }
3788
3789 static struct mlx5e_decap_entry *
3790 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
3791                 uintptr_t hash_key)
3792 {
3793         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3794         struct mlx5e_decap_key r_key;
3795         struct mlx5e_decap_entry *e;
3796
3797         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
3798                                    hlist, hash_key) {
3799                 r_key = e->key;
3800                 if (!cmp_decap_info(&r_key, key) &&
3801                     mlx5e_decap_take(e))
3802                         return e;
3803         }
3804         return NULL;
3805 }
3806
3807 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3808 {
3809         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3810
3811         return kmemdup(tun_info, tun_size, GFP_KERNEL);
3812 }
3813
3814 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3815                                       struct mlx5e_tc_flow *flow,
3816                                       int out_index,
3817                                       struct mlx5e_encap_entry *e,
3818                                       struct netlink_ext_ack *extack)
3819 {
3820         int i;
3821
3822         for (i = 0; i < out_index; i++) {
3823                 if (flow->encaps[i].e != e)
3824                         continue;
3825                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3826                 netdev_err(priv->netdev, "can't duplicate encap action\n");
3827                 return true;
3828         }
3829
3830         return false;
3831 }
3832
3833 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
3834                                struct mlx5_flow_attr *attr,
3835                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
3836                                struct net_device *out_dev,
3837                                int route_dev_ifindex,
3838                                int out_index)
3839 {
3840         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
3841         struct net_device *route_dev;
3842         u16 vport_num;
3843         int err = 0;
3844         u32 data;
3845
3846         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
3847
3848         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
3849             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
3850                 goto out;
3851
3852         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
3853         if (err)
3854                 goto out;
3855
3856         attr->dest_chain = 0;
3857         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3858         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
3859         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
3860                                                        vport_num);
3861         err = mlx5e_tc_match_to_reg_set(esw->dev, mod_hdr_acts,
3862                                         MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, data);
3863         if (err)
3864                 goto out;
3865
3866 out:
3867         if (route_dev)
3868                 dev_put(route_dev);
3869         return err;
3870 }
3871
3872 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3873                               struct mlx5e_tc_flow *flow,
3874                               struct net_device *mirred_dev,
3875                               int out_index,
3876                               struct netlink_ext_ack *extack,
3877                               struct net_device **encap_dev,
3878                               bool *encap_valid)
3879 {
3880         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3881         struct mlx5e_tc_flow_parse_attr *parse_attr;
3882         struct mlx5_flow_attr *attr = flow->attr;
3883         const struct ip_tunnel_info *tun_info;
3884         struct encap_key key;
3885         struct mlx5e_encap_entry *e;
3886         unsigned short family;
3887         uintptr_t hash_key;
3888         int err = 0;
3889
3890         parse_attr = attr->parse_attr;
3891         tun_info = parse_attr->tun_info[out_index];
3892         family = ip_tunnel_info_af(tun_info);
3893         key.ip_tun_key = &tun_info->key;
3894         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3895         if (!key.tc_tunnel) {
3896                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3897                 return -EOPNOTSUPP;
3898         }
3899
3900         hash_key = hash_encap_info(&key);
3901
3902         mutex_lock(&esw->offloads.encap_tbl_lock);
3903         e = mlx5e_encap_get(priv, &key, hash_key);
3904
3905         /* must verify if encap is valid or not */
3906         if (e) {
3907                 /* Check that entry was not already attached to this flow */
3908                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3909                         err = -EOPNOTSUPP;
3910                         goto out_err;
3911                 }
3912
3913                 mutex_unlock(&esw->offloads.encap_tbl_lock);
3914                 wait_for_completion(&e->res_ready);
3915
3916                 /* Protect against concurrent neigh update. */
3917                 mutex_lock(&esw->offloads.encap_tbl_lock);
3918                 if (e->compl_result < 0) {
3919                         err = -EREMOTEIO;
3920                         goto out_err;
3921                 }
3922                 goto attach_flow;
3923         }
3924
3925         e = kzalloc(sizeof(*e), GFP_KERNEL);
3926         if (!e) {
3927                 err = -ENOMEM;
3928                 goto out_err;
3929         }
3930
3931         refcount_set(&e->refcnt, 1);
3932         init_completion(&e->res_ready);
3933
3934         tun_info = dup_tun_info(tun_info);
3935         if (!tun_info) {
3936                 err = -ENOMEM;
3937                 goto out_err_init;
3938         }
3939         e->tun_info = tun_info;
3940         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3941         if (err)
3942                 goto out_err_init;
3943
3944         INIT_LIST_HEAD(&e->flows);
3945         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3946         mutex_unlock(&esw->offloads.encap_tbl_lock);
3947
3948         if (family == AF_INET)
3949                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3950         else if (family == AF_INET6)
3951                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3952
3953         /* Protect against concurrent neigh update. */
3954         mutex_lock(&esw->offloads.encap_tbl_lock);
3955         complete_all(&e->res_ready);
3956         if (err) {
3957                 e->compl_result = err;
3958                 goto out_err;
3959         }
3960         e->compl_result = 1;
3961
3962 attach_flow:
3963         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
3964                                   e->route_dev_ifindex, out_index);
3965         if (err)
3966                 goto out_err;
3967
3968         flow->encaps[out_index].e = e;
3969         list_add(&flow->encaps[out_index].list, &e->flows);
3970         flow->encaps[out_index].index = out_index;
3971         *encap_dev = e->out_dev;
3972         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3973                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3974                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3975                 *encap_valid = true;
3976         } else {
3977                 *encap_valid = false;
3978         }
3979         mutex_unlock(&esw->offloads.encap_tbl_lock);
3980
3981         return err;
3982
3983 out_err:
3984         mutex_unlock(&esw->offloads.encap_tbl_lock);
3985         if (e)
3986                 mlx5e_encap_put(priv, e);
3987         return err;
3988
3989 out_err_init:
3990         mutex_unlock(&esw->offloads.encap_tbl_lock);
3991         kfree(tun_info);
3992         kfree(e);
3993         return err;
3994 }
3995
3996 static int mlx5e_attach_decap(struct mlx5e_priv *priv,
3997                               struct mlx5e_tc_flow *flow,
3998                               struct netlink_ext_ack *extack)
3999 {
4000         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4001         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4002         struct mlx5e_tc_flow_parse_attr *parse_attr;
4003         struct mlx5e_decap_entry *d;
4004         struct mlx5e_decap_key key;
4005         uintptr_t hash_key;
4006         int err = 0;
4007
4008         parse_attr = flow->attr->parse_attr;
4009         if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
4010                 NL_SET_ERR_MSG_MOD(extack,
4011                                    "encap header larger than max supported");
4012                 return -EOPNOTSUPP;
4013         }
4014
4015         key.key = parse_attr->eth;
4016         hash_key = hash_decap_info(&key);
4017         mutex_lock(&esw->offloads.decap_tbl_lock);
4018         d = mlx5e_decap_get(priv, &key, hash_key);
4019         if (d) {
4020                 mutex_unlock(&esw->offloads.decap_tbl_lock);
4021                 wait_for_completion(&d->res_ready);
4022                 mutex_lock(&esw->offloads.decap_tbl_lock);
4023                 if (d->compl_result) {
4024                         err = -EREMOTEIO;
4025                         goto out_free;
4026                 }
4027                 goto found;
4028         }
4029
4030         d = kzalloc(sizeof(*d), GFP_KERNEL);
4031         if (!d) {
4032                 err = -ENOMEM;
4033                 goto out_err;
4034         }
4035
4036         d->key = key;
4037         refcount_set(&d->refcnt, 1);
4038         init_completion(&d->res_ready);
4039         INIT_LIST_HEAD(&d->flows);
4040         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
4041         mutex_unlock(&esw->offloads.decap_tbl_lock);
4042
4043         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
4044                                                      MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
4045                                                      sizeof(parse_attr->eth),
4046                                                      &parse_attr->eth,
4047                                                      MLX5_FLOW_NAMESPACE_FDB);
4048         if (IS_ERR(d->pkt_reformat)) {
4049                 err = PTR_ERR(d->pkt_reformat);
4050                 d->compl_result = err;
4051         }
4052         mutex_lock(&esw->offloads.decap_tbl_lock);
4053         complete_all(&d->res_ready);
4054         if (err)
4055                 goto out_free;
4056
4057 found:
4058         flow->decap_reformat = d;
4059         attr->decap_pkt_reformat = d->pkt_reformat;
4060         list_add(&flow->l3_to_l2_reformat, &d->flows);
4061         mutex_unlock(&esw->offloads.decap_tbl_lock);
4062         return 0;
4063
4064 out_free:
4065         mutex_unlock(&esw->offloads.decap_tbl_lock);
4066         mlx5e_decap_put(priv, d);
4067         return err;
4068
4069 out_err:
4070         mutex_unlock(&esw->offloads.decap_tbl_lock);
4071         return err;
4072 }
4073
4074 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
4075                                 const struct flow_action_entry *act,
4076                                 struct mlx5_esw_flow_attr *attr,
4077                                 u32 *action)
4078 {
4079         u8 vlan_idx = attr->total_vlan;
4080
4081         if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
4082                 return -EOPNOTSUPP;
4083
4084         switch (act->id) {
4085         case FLOW_ACTION_VLAN_POP:
4086                 if (vlan_idx) {
4087                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
4088                                                                  MLX5_FS_VLAN_DEPTH))
4089                                 return -EOPNOTSUPP;
4090
4091                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
4092                 } else {
4093                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4094                 }
4095                 break;
4096         case FLOW_ACTION_VLAN_PUSH:
4097                 attr->vlan_vid[vlan_idx] = act->vlan.vid;
4098                 attr->vlan_prio[vlan_idx] = act->vlan.prio;
4099                 attr->vlan_proto[vlan_idx] = act->vlan.proto;
4100                 if (!attr->vlan_proto[vlan_idx])
4101                         attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
4102
4103                 if (vlan_idx) {
4104                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
4105                                                                  MLX5_FS_VLAN_DEPTH))
4106                                 return -EOPNOTSUPP;
4107
4108                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
4109                 } else {
4110                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
4111                             (act->vlan.proto != htons(ETH_P_8021Q) ||
4112                              act->vlan.prio))
4113                                 return -EOPNOTSUPP;
4114
4115                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
4116                 }
4117                 break;
4118         default:
4119                 return -EINVAL;
4120         }
4121
4122         attr->total_vlan = vlan_idx + 1;
4123
4124         return 0;
4125 }
4126
4127 static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
4128                                           struct net_device *out_dev)
4129 {
4130         struct net_device *fdb_out_dev = out_dev;
4131         struct net_device *uplink_upper;
4132
4133         rcu_read_lock();
4134         uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
4135         if (uplink_upper && netif_is_lag_master(uplink_upper) &&
4136             uplink_upper == out_dev) {
4137                 fdb_out_dev = uplink_dev;
4138         } else if (netif_is_lag_master(out_dev)) {
4139                 fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
4140                 if (fdb_out_dev &&
4141                     (!mlx5e_eswitch_rep(fdb_out_dev) ||
4142                      !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
4143                         fdb_out_dev = NULL;
4144         }
4145         rcu_read_unlock();
4146         return fdb_out_dev;
4147 }
4148
4149 static int add_vlan_push_action(struct mlx5e_priv *priv,
4150                                 struct mlx5_flow_attr *attr,
4151                                 struct net_device **out_dev,
4152                                 u32 *action)
4153 {
4154         struct net_device *vlan_dev = *out_dev;
4155         struct flow_action_entry vlan_act = {
4156                 .id = FLOW_ACTION_VLAN_PUSH,
4157                 .vlan.vid = vlan_dev_vlan_id(vlan_dev),
4158                 .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
4159                 .vlan.prio = 0,
4160         };
4161         int err;
4162
4163         err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4164         if (err)
4165                 return err;
4166
4167         *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
4168                                         dev_get_iflink(vlan_dev));
4169         if (is_vlan_dev(*out_dev))
4170                 err = add_vlan_push_action(priv, attr, out_dev, action);
4171
4172         return err;
4173 }
4174
4175 static int add_vlan_pop_action(struct mlx5e_priv *priv,
4176                                struct mlx5_flow_attr *attr,
4177                                u32 *action)
4178 {
4179         struct flow_action_entry vlan_act = {
4180                 .id = FLOW_ACTION_VLAN_POP,
4181         };
4182         int nest_level, err = 0;
4183
4184         nest_level = attr->parse_attr->filter_dev->lower_level -
4185                                                 priv->netdev->lower_level;
4186         while (nest_level--) {
4187                 err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4188                 if (err)
4189                         return err;
4190         }
4191
4192         return err;
4193 }
4194
4195 static bool same_hw_reps(struct mlx5e_priv *priv,
4196                          struct net_device *peer_netdev)
4197 {
4198         struct mlx5e_priv *peer_priv;
4199
4200         peer_priv = netdev_priv(peer_netdev);
4201
4202         return mlx5e_eswitch_rep(priv->netdev) &&
4203                mlx5e_eswitch_rep(peer_netdev) &&
4204                same_hw_devs(priv, peer_priv);
4205 }
4206
4207 static bool is_lag_dev(struct mlx5e_priv *priv,
4208                        struct net_device *peer_netdev)
4209 {
4210         return ((mlx5_lag_is_sriov(priv->mdev) ||
4211                  mlx5_lag_is_multipath(priv->mdev)) &&
4212                  same_hw_reps(priv, peer_netdev));
4213 }
4214
4215 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4216                                     struct net_device *out_dev)
4217 {
4218         if (is_merged_eswitch_vfs(priv, out_dev))
4219                 return true;
4220
4221         if (is_lag_dev(priv, out_dev))
4222                 return true;
4223
4224         return mlx5e_eswitch_rep(out_dev) &&
4225                same_port_devs(priv, netdev_priv(out_dev));
4226 }
4227
4228 static bool is_duplicated_output_device(struct net_device *dev,
4229                                         struct net_device *out_dev,
4230                                         int *ifindexes, int if_count,
4231                                         struct netlink_ext_ack *extack)
4232 {
4233         int i;
4234
4235         for (i = 0; i < if_count; i++) {
4236                 if (ifindexes[i] == out_dev->ifindex) {
4237                         NL_SET_ERR_MSG_MOD(extack,
4238                                            "can't duplicate output to same device");
4239                         netdev_err(dev, "can't duplicate output to same device: %s\n",
4240                                    out_dev->name);
4241                         return true;
4242                 }
4243         }
4244
4245         return false;
4246 }
4247
4248 static int verify_uplink_forwarding(struct mlx5e_priv *priv,
4249                                     struct mlx5e_tc_flow *flow,
4250                                     struct net_device *out_dev,
4251                                     struct netlink_ext_ack *extack)
4252 {
4253         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4254         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4255         struct mlx5e_rep_priv *rep_priv;
4256
4257         /* Forwarding non encapsulated traffic between
4258          * uplink ports is allowed only if
4259          * termination_table_raw_traffic cap is set.
4260          *
4261          * Input vport was stored attr->in_rep.
4262          * In LAG case, *priv* is the private data of
4263          * uplink which may be not the input vport.
4264          */
4265         rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
4266
4267         if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
4268               mlx5e_eswitch_uplink_rep(out_dev)))
4269                 return 0;
4270
4271         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
4272                                         termination_table_raw_traffic)) {
4273                 NL_SET_ERR_MSG_MOD(extack,
4274                                    "devices are both uplink, can't offload forwarding");
4275                         pr_err("devices %s %s are both uplink, can't offload forwarding\n",
4276                                priv->netdev->name, out_dev->name);
4277                         return -EOPNOTSUPP;
4278         } else if (out_dev != rep_priv->netdev) {
4279                 NL_SET_ERR_MSG_MOD(extack,
4280                                    "devices are not the same uplink, can't offload forwarding");
4281                 pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
4282                        priv->netdev->name, out_dev->name);
4283                 return -EOPNOTSUPP;
4284         }
4285         return 0;
4286 }
4287
4288 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
4289                                 struct flow_action *flow_action,
4290                                 struct mlx5e_tc_flow *flow,
4291                                 struct netlink_ext_ack *extack,
4292                                 struct net_device *filter_dev)
4293 {
4294         struct pedit_headers_action hdrs[2] = {};
4295         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4296         struct mlx5e_tc_flow_parse_attr *parse_attr;
4297         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4298         const struct ip_tunnel_info *info = NULL;
4299         struct mlx5_flow_attr *attr = flow->attr;
4300         int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
4301         bool ft_flow = mlx5e_is_ft_flow(flow);
4302         const struct flow_action_entry *act;
4303         struct mlx5_esw_flow_attr *esw_attr;
4304         bool encap = false, decap = false;
4305         u32 action = attr->action;
4306         int err, i, if_count = 0;
4307         bool mpls_push = false;
4308
4309         if (!flow_action_has_entries(flow_action))
4310                 return -EINVAL;
4311
4312         if (!flow_action_hw_stats_check(flow_action, extack,
4313                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
4314                 return -EOPNOTSUPP;
4315
4316         esw_attr = attr->esw_attr;
4317         parse_attr = attr->parse_attr;
4318
4319         flow_action_for_each(i, act, flow_action) {
4320                 switch (act->id) {
4321                 case FLOW_ACTION_DROP:
4322                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
4323                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
4324                         break;
4325                 case FLOW_ACTION_TRAP:
4326                         if (!flow_offload_has_one_action(flow_action)) {
4327                                 NL_SET_ERR_MSG_MOD(extack,
4328                                                    "action trap is supported as a sole action only");
4329                                 return -EOPNOTSUPP;
4330                         }
4331                         action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4332                                    MLX5_FLOW_CONTEXT_ACTION_COUNT);
4333                         attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
4334                         break;
4335                 case FLOW_ACTION_MPLS_PUSH:
4336                         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
4337                                                         reformat_l2_to_l3_tunnel) ||
4338                             act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
4339                                 NL_SET_ERR_MSG_MOD(extack,
4340                                                    "mpls push is supported only for mpls_uc protocol");
4341                                 return -EOPNOTSUPP;
4342                         }
4343                         mpls_push = true;
4344                         break;
4345                 case FLOW_ACTION_MPLS_POP:
4346                         /* we only support mpls pop if it is the first action
4347                          * and the filter net device is bareudp. Subsequent
4348                          * actions can be pedit and the last can be mirred
4349                          * egress redirect.
4350                          */
4351                         if (i) {
4352                                 NL_SET_ERR_MSG_MOD(extack,
4353                                                    "mpls pop supported only as first action");
4354                                 return -EOPNOTSUPP;
4355                         }
4356                         if (!netif_is_bareudp(filter_dev)) {
4357                                 NL_SET_ERR_MSG_MOD(extack,
4358                                                    "mpls pop supported only on bareudp devices");
4359                                 return -EOPNOTSUPP;
4360                         }
4361
4362                         parse_attr->eth.h_proto = act->mpls_pop.proto;
4363                         action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
4364                         flow_flag_set(flow, L3_TO_L2_DECAP);
4365                         break;
4366                 case FLOW_ACTION_MANGLE:
4367                 case FLOW_ACTION_ADD:
4368                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
4369                                                     parse_attr, hdrs, flow, extack);
4370                         if (err)
4371                                 return err;
4372
4373                         if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
4374                                 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4375                                 esw_attr->split_count = esw_attr->out_count;
4376                         }
4377                         break;
4378                 case FLOW_ACTION_CSUM:
4379                         if (csum_offload_supported(priv, action,
4380                                                    act->csum_flags, extack))
4381                                 break;
4382
4383                         return -EOPNOTSUPP;
4384                 case FLOW_ACTION_REDIRECT:
4385                 case FLOW_ACTION_MIRRED: {
4386                         struct mlx5e_priv *out_priv;
4387                         struct net_device *out_dev;
4388
4389                         out_dev = act->dev;
4390                         if (!out_dev) {
4391                                 /* out_dev is NULL when filters with
4392                                  * non-existing mirred device are replayed to
4393                                  * the driver.
4394                                  */
4395                                 return -EINVAL;
4396                         }
4397
4398                         if (mpls_push && !netif_is_bareudp(out_dev)) {
4399                                 NL_SET_ERR_MSG_MOD(extack,
4400                                                    "mpls is supported only through a bareudp device");
4401                                 return -EOPNOTSUPP;
4402                         }
4403
4404                         if (ft_flow && out_dev == priv->netdev) {
4405                                 /* Ignore forward to self rules generated
4406                                  * by adding both mlx5 devs to the flow table
4407                                  * block on a normal nft offload setup.
4408                                  */
4409                                 return -EOPNOTSUPP;
4410                         }
4411
4412                         if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
4413                                 NL_SET_ERR_MSG_MOD(extack,
4414                                                    "can't support more output ports, can't offload forwarding");
4415                                 netdev_warn(priv->netdev,
4416                                             "can't support more than %d output ports, can't offload forwarding\n",
4417                                             esw_attr->out_count);
4418                                 return -EOPNOTSUPP;
4419                         }
4420
4421                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4422                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
4423                         if (encap) {
4424                                 parse_attr->mirred_ifindex[esw_attr->out_count] =
4425                                         out_dev->ifindex;
4426                                 parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
4427                                 if (!parse_attr->tun_info[esw_attr->out_count])
4428                                         return -ENOMEM;
4429                                 encap = false;
4430                                 esw_attr->dests[esw_attr->out_count].flags |=
4431                                         MLX5_ESW_DEST_ENCAP;
4432                                 esw_attr->out_count++;
4433                                 /* attr->dests[].rep is resolved when we
4434                                  * handle encap
4435                                  */
4436                         } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4437                                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4438                                 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4439
4440                                 if (is_duplicated_output_device(priv->netdev,
4441                                                                 out_dev,
4442                                                                 ifindexes,
4443                                                                 if_count,
4444                                                                 extack))
4445                                         return -EOPNOTSUPP;
4446
4447                                 ifindexes[if_count] = out_dev->ifindex;
4448                                 if_count++;
4449
4450                                 out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4451                                 if (!out_dev)
4452                                         return -ENODEV;
4453
4454                                 if (is_vlan_dev(out_dev)) {
4455                                         err = add_vlan_push_action(priv, attr,
4456                                                                    &out_dev,
4457                                                                    &action);
4458                                         if (err)
4459                                                 return err;
4460                                 }
4461
4462                                 if (is_vlan_dev(parse_attr->filter_dev)) {
4463                                         err = add_vlan_pop_action(priv, attr,
4464                                                                   &action);
4465                                         if (err)
4466                                                 return err;
4467                                 }
4468
4469                                 err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4470                                 if (err)
4471                                         return err;
4472
4473                                 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4474                                         NL_SET_ERR_MSG_MOD(extack,
4475                                                            "devices are not on same switch HW, can't offload forwarding");
4476                                         return -EOPNOTSUPP;
4477                                 }
4478
4479                                 out_priv = netdev_priv(out_dev);
4480                                 rpriv = out_priv->ppriv;
4481                                 esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
4482                                 esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
4483                                 esw_attr->out_count++;
4484                         } else if (parse_attr->filter_dev != priv->netdev) {
4485                                 /* All mlx5 devices are called to configure
4486                                  * high level device filters. Therefore, the
4487                                  * *attempt* to  install a filter on invalid
4488                                  * eswitch should not trigger an explicit error
4489                                  */
4490                                 return -EINVAL;
4491                         } else {
4492                                 NL_SET_ERR_MSG_MOD(extack,
4493                                                    "devices are not on same switch HW, can't offload forwarding");
4494                                 netdev_warn(priv->netdev,
4495                                             "devices %s %s not on same switch HW, can't offload forwarding\n",
4496                                             priv->netdev->name,
4497                                             out_dev->name);
4498                                 return -EINVAL;
4499                         }
4500                         }
4501                         break;
4502                 case FLOW_ACTION_TUNNEL_ENCAP:
4503                         info = act->tunnel;
4504                         if (info)
4505                                 encap = true;
4506                         else
4507                                 return -EOPNOTSUPP;
4508
4509                         break;
4510                 case FLOW_ACTION_VLAN_PUSH:
4511                 case FLOW_ACTION_VLAN_POP:
4512                         if (act->id == FLOW_ACTION_VLAN_PUSH &&
4513                             (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4514                                 /* Replace vlan pop+push with vlan modify */
4515                                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4516                                 err = add_vlan_rewrite_action(priv,
4517                                                               MLX5_FLOW_NAMESPACE_FDB,
4518                                                               act, parse_attr, hdrs,
4519                                                               &action, extack);
4520                         } else {
4521                                 err = parse_tc_vlan_action(priv, act, esw_attr, &action);
4522                         }
4523                         if (err)
4524                                 return err;
4525
4526                         esw_attr->split_count = esw_attr->out_count;
4527                         break;
4528                 case FLOW_ACTION_VLAN_MANGLE:
4529                         err = add_vlan_rewrite_action(priv,
4530                                                       MLX5_FLOW_NAMESPACE_FDB,
4531                                                       act, parse_attr, hdrs,
4532                                                       &action, extack);
4533                         if (err)
4534                                 return err;
4535
4536                         esw_attr->split_count = esw_attr->out_count;
4537                         break;
4538                 case FLOW_ACTION_TUNNEL_DECAP:
4539                         decap = true;
4540                         break;
4541                 case FLOW_ACTION_GOTO:
4542                         err = validate_goto_chain(priv, flow, act, action,
4543                                                   extack);
4544                         if (err)
4545                                 return err;
4546
4547                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4548                         attr->dest_chain = act->chain_index;
4549                         break;
4550                 case FLOW_ACTION_CT:
4551                         err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
4552                         if (err)
4553                                 return err;
4554
4555                         flow_flag_set(flow, CT);
4556                         break;
4557                 default:
4558                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4559                         return -EOPNOTSUPP;
4560                 }
4561         }
4562
4563         if (decap && esw_attr->rx_tun_attr) {
4564                 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
4565                 if (err)
4566                         return err;
4567         }
4568
4569         /* always set IP version for indirect table handling */
4570         attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4571
4572         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4573             action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4574                 /* For prio tag mode, replace vlan pop with rewrite vlan prio
4575                  * tag rewrite.
4576                  */
4577                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4578                 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4579                                                        &action, extack);
4580                 if (err)
4581                         return err;
4582         }
4583
4584         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4585             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4586                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4587                                             parse_attr, hdrs, &action, extack);
4588                 if (err)
4589                         return err;
4590                 /* in case all pedit actions are skipped, remove the MOD_HDR
4591                  * flag. we might have set split_count either by pedit or
4592                  * pop/push. if there is no pop/push either, reset it too.
4593                  */
4594                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
4595                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4596                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4597                         if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4598                               (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4599                                 esw_attr->split_count = 0;
4600                 }
4601         }
4602
4603         attr->action = action;
4604         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
4605                 return -EOPNOTSUPP;
4606
4607         if (attr->dest_chain) {
4608                 if (decap) {
4609                         /* It can be supported if we'll create a mapping for
4610                          * the tunnel device only (without tunnel), and set
4611                          * this tunnel id with this decap flow.
4612                          *
4613                          * On restore (miss), we'll just set this saved tunnel
4614                          * device.
4615                          */
4616
4617                         NL_SET_ERR_MSG(extack,
4618                                        "Decap with goto isn't supported");
4619                         netdev_warn(priv->netdev,
4620                                     "Decap with goto isn't supported");
4621                         return -EOPNOTSUPP;
4622                 }
4623
4624                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
4625                         NL_SET_ERR_MSG_MOD(extack,
4626                                            "Mirroring goto chain rules isn't supported");
4627                         return -EOPNOTSUPP;
4628                 }
4629                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4630         }
4631
4632         if (!(attr->action &
4633               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4634                 NL_SET_ERR_MSG_MOD(extack,
4635                                    "Rule must have at least one forward/drop action");
4636                 return -EOPNOTSUPP;
4637         }
4638
4639         if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4640                 NL_SET_ERR_MSG_MOD(extack,
4641                                    "current firmware doesn't support split rule for port mirroring");
4642                 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
4643                 return -EOPNOTSUPP;
4644         }
4645
4646         return 0;
4647 }
4648
4649 static void get_flags(int flags, unsigned long *flow_flags)
4650 {
4651         unsigned long __flow_flags = 0;
4652
4653         if (flags & MLX5_TC_FLAG(INGRESS))
4654                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4655         if (flags & MLX5_TC_FLAG(EGRESS))
4656                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4657
4658         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4659                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4660         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4661                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4662         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4663                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4664
4665         *flow_flags = __flow_flags;
4666 }
4667
4668 static const struct rhashtable_params tc_ht_params = {
4669         .head_offset = offsetof(struct mlx5e_tc_flow, node),
4670         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4671         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4672         .automatic_shrinking = true,
4673 };
4674
4675 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4676                                     unsigned long flags)
4677 {
4678         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4679         struct mlx5e_rep_priv *uplink_rpriv;
4680
4681         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4682                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4683                 return &uplink_rpriv->uplink_priv.tc_ht;
4684         } else /* NIC offload */
4685                 return &priv->fs.tc.ht;
4686 }
4687
4688 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4689 {
4690         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4691         struct mlx5_flow_attr *attr = flow->attr;
4692         bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4693                 flow_flag_test(flow, INGRESS);
4694         bool act_is_encap = !!(attr->action &
4695                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4696         bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4697                                                 MLX5_DEVCOM_ESW_OFFLOADS);
4698
4699         if (!esw_paired)
4700                 return false;
4701
4702         if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4703              mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4704             (is_rep_ingress || act_is_encap))
4705                 return true;
4706
4707         return false;
4708 }
4709
4710 struct mlx5_flow_attr *
4711 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4712 {
4713         u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4714                                 sizeof(struct mlx5_esw_flow_attr) :
4715                                 sizeof(struct mlx5_nic_flow_attr);
4716         struct mlx5_flow_attr *attr;
4717
4718         return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4719 }
4720
4721 static int
4722 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4723                  struct flow_cls_offload *f, unsigned long flow_flags,
4724                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
4725                  struct mlx5e_tc_flow **__flow)
4726 {
4727         struct mlx5e_tc_flow_parse_attr *parse_attr;
4728         struct mlx5_flow_attr *attr;
4729         struct mlx5e_tc_flow *flow;
4730         int err = -ENOMEM;
4731         int out_index;
4732
4733         flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4734         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4735         if (!parse_attr || !flow)
4736                 goto err_free;
4737
4738         flow->flags = flow_flags;
4739         flow->cookie = f->cookie;
4740         flow->priv = priv;
4741
4742         attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4743         if (!attr)
4744                 goto err_free;
4745
4746         flow->attr = attr;
4747
4748         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4749                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4750         INIT_LIST_HEAD(&flow->hairpin);
4751         INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4752         refcount_set(&flow->refcnt, 1);
4753         init_completion(&flow->init_done);
4754
4755         *__flow = flow;
4756         *__parse_attr = parse_attr;
4757
4758         return 0;
4759
4760 err_free:
4761         kfree(flow);
4762         kvfree(parse_attr);
4763         return err;
4764 }
4765
4766 static void
4767 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4768                      struct mlx5e_tc_flow_parse_attr *parse_attr,
4769                      struct flow_cls_offload *f)
4770 {
4771         attr->parse_attr = parse_attr;
4772         attr->chain = f->common.chain_index;
4773         attr->prio = f->common.prio;
4774 }
4775
4776 static void
4777 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4778                          struct mlx5e_priv *priv,
4779                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4780                          struct flow_cls_offload *f,
4781                          struct mlx5_eswitch_rep *in_rep,
4782                          struct mlx5_core_dev *in_mdev)
4783 {
4784         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4785         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4786
4787         mlx5e_flow_attr_init(attr, parse_attr, f);
4788
4789         esw_attr->in_rep = in_rep;
4790         esw_attr->in_mdev = in_mdev;
4791
4792         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4793             MLX5_COUNTER_SOURCE_ESWITCH)
4794                 esw_attr->counter_dev = in_mdev;
4795         else
4796                 esw_attr->counter_dev = priv->mdev;
4797 }
4798
4799 static struct mlx5e_tc_flow *
4800 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4801                      struct flow_cls_offload *f,
4802                      unsigned long flow_flags,
4803                      struct net_device *filter_dev,
4804                      struct mlx5_eswitch_rep *in_rep,
4805                      struct mlx5_core_dev *in_mdev)
4806 {
4807         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4808         struct netlink_ext_ack *extack = f->common.extack;
4809         struct mlx5e_tc_flow_parse_attr *parse_attr;
4810         struct mlx5e_tc_flow *flow;
4811         int attr_size, err;
4812
4813         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4814         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4815         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4816                                &parse_attr, &flow);
4817         if (err)
4818                 goto out;
4819
4820         parse_attr->filter_dev = filter_dev;
4821         mlx5e_flow_esw_attr_init(flow->attr,
4822                                  priv, parse_attr,
4823                                  f, in_rep, in_mdev);
4824
4825         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4826                                f, filter_dev);
4827         if (err)
4828                 goto err_free;
4829
4830         /* actions validation depends on parsing the ct matches first */
4831         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4832                                    &flow->attr->ct_attr, extack);
4833         if (err)
4834                 goto err_free;
4835
4836         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4837         if (err)
4838                 goto err_free;
4839
4840         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4841         complete_all(&flow->init_done);
4842         if (err) {
4843                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4844                         goto err_free;
4845
4846                 add_unready_flow(flow);
4847         }
4848
4849         return flow;
4850
4851 err_free:
4852         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4853         mlx5e_flow_put(priv, flow);
4854 out:
4855         return ERR_PTR(err);
4856 }
4857
4858 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4859                                       struct mlx5e_tc_flow *flow,
4860                                       unsigned long flow_flags)
4861 {
4862         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4863         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4864         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4865         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4866         struct mlx5e_tc_flow_parse_attr *parse_attr;
4867         struct mlx5e_rep_priv *peer_urpriv;
4868         struct mlx5e_tc_flow *peer_flow;
4869         struct mlx5_core_dev *in_mdev;
4870         int err = 0;
4871
4872         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4873         if (!peer_esw)
4874                 return -ENODEV;
4875
4876         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4877         peer_priv = netdev_priv(peer_urpriv->netdev);
4878
4879         /* in_mdev is assigned of which the packet originated from.
4880          * So packets redirected to uplink use the same mdev of the
4881          * original flow and packets redirected from uplink use the
4882          * peer mdev.
4883          */
4884         if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4885                 in_mdev = peer_priv->mdev;
4886         else
4887                 in_mdev = priv->mdev;
4888
4889         parse_attr = flow->attr->parse_attr;
4890         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4891                                          parse_attr->filter_dev,
4892                                          attr->in_rep, in_mdev);
4893         if (IS_ERR(peer_flow)) {
4894                 err = PTR_ERR(peer_flow);
4895                 goto out;
4896         }
4897
4898         flow->peer_flow = peer_flow;
4899         flow_flag_set(flow, DUP);
4900         mutex_lock(&esw->offloads.peer_mutex);
4901         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4902         mutex_unlock(&esw->offloads.peer_mutex);
4903
4904 out:
4905         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4906         return err;
4907 }
4908
4909 static int
4910 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4911                    struct flow_cls_offload *f,
4912                    unsigned long flow_flags,
4913                    struct net_device *filter_dev,
4914                    struct mlx5e_tc_flow **__flow)
4915 {
4916         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4917         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4918         struct mlx5_core_dev *in_mdev = priv->mdev;
4919         struct mlx5e_tc_flow *flow;
4920         int err;
4921
4922         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4923                                     in_mdev);
4924         if (IS_ERR(flow))
4925                 return PTR_ERR(flow);
4926
4927         if (is_peer_flow_needed(flow)) {
4928                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4929                 if (err) {
4930                         mlx5e_tc_del_fdb_flow(priv, flow);
4931                         goto out;
4932                 }
4933         }
4934
4935         *__flow = flow;
4936
4937         return 0;
4938
4939 out:
4940         return err;
4941 }
4942
4943 static int
4944 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4945                    struct flow_cls_offload *f,
4946                    unsigned long flow_flags,
4947                    struct net_device *filter_dev,
4948                    struct mlx5e_tc_flow **__flow)
4949 {
4950         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4951         struct netlink_ext_ack *extack = f->common.extack;
4952         struct mlx5e_tc_flow_parse_attr *parse_attr;
4953         struct mlx5e_tc_flow *flow;
4954         int attr_size, err;
4955
4956         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4957                 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4958                         return -EOPNOTSUPP;
4959         } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4960                 return -EOPNOTSUPP;
4961         }
4962
4963         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4964         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4965         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4966                                &parse_attr, &flow);
4967         if (err)
4968                 goto out;
4969
4970         parse_attr->filter_dev = filter_dev;
4971         mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4972
4973         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4974                                f, filter_dev);
4975         if (err)
4976                 goto err_free;
4977
4978         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4979                                    &flow->attr->ct_attr, extack);
4980         if (err)
4981                 goto err_free;
4982
4983         err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4984         if (err)
4985                 goto err_free;
4986
4987         err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4988         if (err)
4989                 goto err_free;
4990
4991         flow_flag_set(flow, OFFLOADED);
4992         *__flow = flow;
4993
4994         return 0;
4995
4996 err_free:
4997         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4998         mlx5e_flow_put(priv, flow);
4999 out:
5000         return err;
5001 }
5002
5003 static int
5004 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
5005                   struct flow_cls_offload *f,
5006                   unsigned long flags,
5007                   struct net_device *filter_dev,
5008                   struct mlx5e_tc_flow **flow)
5009 {
5010         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5011         unsigned long flow_flags;
5012         int err;
5013
5014         get_flags(flags, &flow_flags);
5015
5016         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
5017                 return -EOPNOTSUPP;
5018
5019         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
5020                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
5021                                          filter_dev, flow);
5022         else
5023                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
5024                                          filter_dev, flow);
5025
5026         return err;
5027 }
5028
5029 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
5030                                            struct mlx5e_rep_priv *rpriv)
5031 {
5032         /* Offloaded flow rule is allowed to duplicate on non-uplink representor
5033          * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
5034          * function is called from NIC mode.
5035          */
5036         return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
5037 }
5038
5039 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
5040                            struct flow_cls_offload *f, unsigned long flags)
5041 {
5042         struct netlink_ext_ack *extack = f->common.extack;
5043         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5044         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5045         struct mlx5e_tc_flow *flow;
5046         int err = 0;
5047
5048         rcu_read_lock();
5049         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
5050         if (flow) {
5051                 /* Same flow rule offloaded to non-uplink representor sharing tc block,
5052                  * just return 0.
5053                  */
5054                 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
5055                         goto rcu_unlock;
5056
5057                 NL_SET_ERR_MSG_MOD(extack,
5058                                    "flow cookie already exists, ignoring");
5059                 netdev_warn_once(priv->netdev,
5060                                  "flow cookie %lx already exists, ignoring\n",
5061                                  f->cookie);
5062                 err = -EEXIST;
5063                 goto rcu_unlock;
5064         }
5065 rcu_unlock:
5066         rcu_read_unlock();
5067         if (flow)
5068                 goto out;
5069
5070         trace_mlx5e_configure_flower(f);
5071         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
5072         if (err)
5073                 goto out;
5074
5075         /* Flow rule offloaded to non-uplink representor sharing tc block,
5076          * set the flow's owner dev.
5077          */
5078         if (is_flow_rule_duplicate_allowed(dev, rpriv))
5079                 flow->orig_dev = dev;
5080
5081         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
5082         if (err)
5083                 goto err_free;
5084
5085         return 0;
5086
5087 err_free:
5088         mlx5e_flow_put(priv, flow);
5089 out:
5090         return err;
5091 }
5092
5093 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
5094 {
5095         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
5096         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
5097
5098         return flow_flag_test(flow, INGRESS) == dir_ingress &&
5099                 flow_flag_test(flow, EGRESS) == dir_egress;
5100 }
5101
5102 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
5103                         struct flow_cls_offload *f, unsigned long flags)
5104 {
5105         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5106         struct mlx5e_tc_flow *flow;
5107         int err;
5108
5109         rcu_read_lock();
5110         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
5111         if (!flow || !same_flow_direction(flow, flags)) {
5112                 err = -EINVAL;
5113                 goto errout;
5114         }
5115
5116         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
5117          * set.
5118          */
5119         if (flow_flag_test_and_set(flow, DELETED)) {
5120                 err = -EINVAL;
5121                 goto errout;
5122         }
5123         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
5124         rcu_read_unlock();
5125
5126         trace_mlx5e_delete_flower(f);
5127         mlx5e_flow_put(priv, flow);
5128
5129         return 0;
5130
5131 errout:
5132         rcu_read_unlock();
5133         return err;
5134 }
5135
5136 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
5137                        struct flow_cls_offload *f, unsigned long flags)
5138 {
5139         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
5140         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5141         struct mlx5_eswitch *peer_esw;
5142         struct mlx5e_tc_flow *flow;
5143         struct mlx5_fc *counter;
5144         u64 lastuse = 0;
5145         u64 packets = 0;
5146         u64 bytes = 0;
5147         int err = 0;
5148
5149         rcu_read_lock();
5150         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
5151                                                 tc_ht_params));
5152         rcu_read_unlock();
5153         if (IS_ERR(flow))
5154                 return PTR_ERR(flow);
5155
5156         if (!same_flow_direction(flow, flags)) {
5157                 err = -EINVAL;
5158                 goto errout;
5159         }
5160
5161         if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
5162                 counter = mlx5e_tc_get_counter(flow);
5163                 if (!counter)
5164                         goto errout;
5165
5166                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
5167         }
5168
5169         /* Under multipath it's possible for one rule to be currently
5170          * un-offloaded while the other rule is offloaded.
5171          */
5172         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5173         if (!peer_esw)
5174                 goto out;
5175
5176         if (flow_flag_test(flow, DUP) &&
5177             flow_flag_test(flow->peer_flow, OFFLOADED)) {
5178                 u64 bytes2;
5179                 u64 packets2;
5180                 u64 lastuse2;
5181
5182                 counter = mlx5e_tc_get_counter(flow->peer_flow);
5183                 if (!counter)
5184                         goto no_peer_counter;
5185                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
5186
5187                 bytes += bytes2;
5188                 packets += packets2;
5189                 lastuse = max_t(u64, lastuse, lastuse2);
5190         }
5191
5192 no_peer_counter:
5193         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5194 out:
5195         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
5196                           FLOW_ACTION_HW_STATS_DELAYED);
5197         trace_mlx5e_stats_flower(f);
5198 errout:
5199         mlx5e_flow_put(priv, flow);
5200         return err;
5201 }
5202
5203 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
5204                                struct netlink_ext_ack *extack)
5205 {
5206         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5207         struct mlx5_eswitch *esw;
5208         u32 rate_mbps = 0;
5209         u16 vport_num;
5210         int err;
5211
5212         vport_num = rpriv->rep->vport;
5213         if (vport_num >= MLX5_VPORT_ECPF) {
5214                 NL_SET_ERR_MSG_MOD(extack,
5215                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
5216                 return -EOPNOTSUPP;
5217         }
5218
5219         esw = priv->mdev->priv.eswitch;
5220         /* rate is given in bytes/sec.
5221          * First convert to bits/sec and then round to the nearest mbit/secs.
5222          * mbit means million bits.
5223          * Moreover, if rate is non zero we choose to configure to a minimum of
5224          * 1 mbit/sec.
5225          */
5226         if (rate) {
5227                 rate = (rate * BITS_PER_BYTE) + 500000;
5228                 rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
5229         }
5230
5231         err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
5232         if (err)
5233                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5234
5235         return err;
5236 }
5237
5238 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5239                                         struct flow_action *flow_action,
5240                                         struct netlink_ext_ack *extack)
5241 {
5242         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5243         const struct flow_action_entry *act;
5244         int err;
5245         int i;
5246
5247         if (!flow_action_has_entries(flow_action)) {
5248                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5249                 return -EINVAL;
5250         }
5251
5252         if (!flow_offload_has_one_action(flow_action)) {
5253                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5254                 return -EOPNOTSUPP;
5255         }
5256
5257         if (!flow_action_basic_hw_stats_check(flow_action, extack))
5258                 return -EOPNOTSUPP;
5259
5260         flow_action_for_each(i, act, flow_action) {
5261                 switch (act->id) {
5262                 case FLOW_ACTION_POLICE:
5263                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5264                         if (err)
5265                                 return err;
5266
5267                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
5268                         break;
5269                 default:
5270                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5271                         return -EOPNOTSUPP;
5272                 }
5273         }
5274
5275         return 0;
5276 }
5277
5278 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5279                                 struct tc_cls_matchall_offload *ma)
5280 {
5281         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5282         struct netlink_ext_ack *extack = ma->common.extack;
5283
5284         if (!mlx5_esw_qos_enabled(esw)) {
5285                 NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
5286                 return -EOPNOTSUPP;
5287         }
5288
5289         if (ma->common.prio != 1) {
5290                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
5291                 return -EINVAL;
5292         }
5293
5294         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5295 }
5296
5297 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5298                              struct tc_cls_matchall_offload *ma)
5299 {
5300         struct netlink_ext_ack *extack = ma->common.extack;
5301
5302         return apply_police_params(priv, 0, extack);
5303 }
5304
5305 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5306                              struct tc_cls_matchall_offload *ma)
5307 {
5308         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5309         struct rtnl_link_stats64 cur_stats;
5310         u64 dbytes;
5311         u64 dpkts;
5312
5313         cur_stats = priv->stats.vf_vport;
5314         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5315         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5316         rpriv->prev_vf_vport_stats = cur_stats;
5317         flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5318                           FLOW_ACTION_HW_STATS_DELAYED);
5319 }
5320
5321 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5322                                               struct mlx5e_priv *peer_priv)
5323 {
5324         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5325         struct mlx5e_hairpin_entry *hpe, *tmp;
5326         LIST_HEAD(init_wait_list);
5327         u16 peer_vhca_id;
5328         int bkt;
5329
5330         if (!same_hw_devs(priv, peer_priv))
5331                 return;
5332
5333         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5334
5335         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
5336         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
5337                 if (refcount_inc_not_zero(&hpe->refcnt))
5338                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5339         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
5340
5341         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5342                 wait_for_completion(&hpe->res_ready);
5343                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5344                         hpe->hp->pair->peer_gone = true;
5345
5346                 mlx5e_hairpin_put(priv, hpe);
5347         }
5348 }
5349
5350 static int mlx5e_tc_netdev_event(struct notifier_block *this,
5351                                  unsigned long event, void *ptr)
5352 {
5353         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5354         struct mlx5e_flow_steering *fs;
5355         struct mlx5e_priv *peer_priv;
5356         struct mlx5e_tc_table *tc;
5357         struct mlx5e_priv *priv;
5358
5359         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5360             event != NETDEV_UNREGISTER ||
5361             ndev->reg_state == NETREG_REGISTERED)
5362                 return NOTIFY_DONE;
5363
5364         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5365         fs = container_of(tc, struct mlx5e_flow_steering, tc);
5366         priv = container_of(fs, struct mlx5e_priv, fs);
5367         peer_priv = netdev_priv(ndev);
5368         if (priv == peer_priv ||
5369             !(priv->netdev->features & NETIF_F_HW_TC))
5370                 return NOTIFY_DONE;
5371
5372         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5373
5374         return NOTIFY_DONE;
5375 }
5376
5377 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
5378 {
5379         int tc_grp_size, tc_tbl_size;
5380         u32 max_flow_counter;
5381
5382         max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
5383                             MLX5_CAP_GEN(dev, max_flow_counter_15_0);
5384
5385         tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
5386
5387         tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
5388                             BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
5389
5390         return tc_tbl_size;
5391 }
5392
5393 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5394 {
5395         struct mlx5e_tc_table *tc = &priv->fs.tc;
5396         struct mlx5_core_dev *dev = priv->mdev;
5397         struct mlx5_chains_attr attr = {};
5398         int err;
5399
5400         mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5401         mutex_init(&tc->t_lock);
5402         mutex_init(&tc->hairpin_tbl_lock);
5403         hash_init(tc->hairpin_tbl);
5404
5405         err = rhashtable_init(&tc->ht, &tc_ht_params);
5406         if (err)
5407                 return err;
5408
5409         lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
5410
5411         if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
5412                 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5413                         MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5414                 attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5415         }
5416         attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5417         attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5418         attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5419         attr.default_ft = priv->fs.vlan.ft.t;
5420
5421         tc->chains = mlx5_chains_create(dev, &attr);
5422         if (IS_ERR(tc->chains)) {
5423                 err = PTR_ERR(tc->chains);
5424                 goto err_chains;
5425         }
5426
5427         tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
5428                                  MLX5_FLOW_NAMESPACE_KERNEL);
5429         if (IS_ERR(tc->ct)) {
5430                 err = PTR_ERR(tc->ct);
5431                 goto err_ct;
5432         }
5433
5434         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5435         err = register_netdevice_notifier_dev_net(priv->netdev,
5436                                                   &tc->netdevice_nb,
5437                                                   &tc->netdevice_nn);
5438         if (err) {
5439                 tc->netdevice_nb.notifier_call = NULL;
5440                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5441                 goto err_reg;
5442         }
5443
5444         return 0;
5445
5446 err_reg:
5447         mlx5_tc_ct_clean(tc->ct);
5448 err_ct:
5449         mlx5_chains_destroy(tc->chains);
5450 err_chains:
5451         rhashtable_destroy(&tc->ht);
5452         return err;
5453 }
5454
5455 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5456 {
5457         struct mlx5e_tc_flow *flow = ptr;
5458         struct mlx5e_priv *priv = flow->priv;
5459
5460         mlx5e_tc_del_flow(priv, flow);
5461         kfree(flow);
5462 }
5463
5464 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5465 {
5466         struct mlx5e_tc_table *tc = &priv->fs.tc;
5467
5468         if (tc->netdevice_nb.notifier_call)
5469                 unregister_netdevice_notifier_dev_net(priv->netdev,
5470                                                       &tc->netdevice_nb,
5471                                                       &tc->netdevice_nn);
5472
5473         mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5474         mutex_destroy(&tc->hairpin_tbl_lock);
5475
5476         rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5477
5478         if (!IS_ERR_OR_NULL(tc->t)) {
5479                 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5480                 tc->t = NULL;
5481         }
5482         mutex_destroy(&tc->t_lock);
5483
5484         mlx5_tc_ct_clean(tc->ct);
5485         mlx5_chains_destroy(tc->chains);
5486 }
5487
5488 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
5489 {
5490         const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5491         struct mlx5_rep_uplink_priv *uplink_priv;
5492         struct mlx5e_rep_priv *rpriv;
5493         struct mapping_ctx *mapping;
5494         struct mlx5_eswitch *esw;
5495         struct mlx5e_priv *priv;
5496         int err = 0;
5497
5498         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5499         rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5500         priv = netdev_priv(rpriv->netdev);
5501         esw = priv->mdev->priv.eswitch;
5502
5503         uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5504                                                esw_chains(esw),
5505                                                &esw->offloads.mod_hdr,
5506                                                MLX5_FLOW_NAMESPACE_FDB);
5507         if (IS_ERR(uplink_priv->ct_priv))
5508                 goto err_ct;
5509
5510         mapping = mapping_create(sizeof(struct tunnel_match_key),
5511                                  TUNNEL_INFO_BITS_MASK, true);
5512         if (IS_ERR(mapping)) {
5513                 err = PTR_ERR(mapping);
5514                 goto err_tun_mapping;
5515         }
5516         uplink_priv->tunnel_mapping = mapping;
5517
5518         mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
5519         if (IS_ERR(mapping)) {
5520                 err = PTR_ERR(mapping);
5521                 goto err_enc_opts_mapping;
5522         }
5523         uplink_priv->tunnel_enc_opts_mapping = mapping;
5524
5525         err = rhashtable_init(tc_ht, &tc_ht_params);
5526         if (err)
5527                 goto err_ht_init;
5528
5529         lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5530
5531         return err;
5532
5533 err_ht_init:
5534         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5535 err_enc_opts_mapping:
5536         mapping_destroy(uplink_priv->tunnel_mapping);
5537 err_tun_mapping:
5538         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5539 err_ct:
5540         netdev_warn(priv->netdev,
5541                     "Failed to initialize tc (eswitch), err: %d", err);
5542         return err;
5543 }
5544
5545 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
5546 {
5547         struct mlx5_rep_uplink_priv *uplink_priv;
5548
5549         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5550
5551         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5552
5553         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5554         mapping_destroy(uplink_priv->tunnel_mapping);
5555
5556         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5557 }
5558
5559 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5560 {
5561         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5562
5563         return atomic_read(&tc_ht->nelems);
5564 }
5565
5566 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5567 {
5568         struct mlx5e_tc_flow *flow, *tmp;
5569
5570         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5571                 __mlx5e_tc_del_fdb_peer_flow(flow);
5572 }
5573
5574 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5575 {
5576         struct mlx5_rep_uplink_priv *rpriv =
5577                 container_of(work, struct mlx5_rep_uplink_priv,
5578                              reoffload_flows_work);
5579         struct mlx5e_tc_flow *flow, *tmp;
5580
5581         mutex_lock(&rpriv->unready_flows_lock);
5582         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5583                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5584                         unready_flow_del(flow);
5585         }
5586         mutex_unlock(&rpriv->unready_flows_lock);
5587 }
5588
5589 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5590                                      struct flow_cls_offload *cls_flower,
5591                                      unsigned long flags)
5592 {
5593         switch (cls_flower->command) {
5594         case FLOW_CLS_REPLACE:
5595                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5596                                               flags);
5597         case FLOW_CLS_DESTROY:
5598                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5599                                            flags);
5600         case FLOW_CLS_STATS:
5601                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5602                                           flags);
5603         default:
5604                 return -EOPNOTSUPP;
5605         }
5606 }
5607
5608 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5609                             void *cb_priv)
5610 {
5611         unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
5612         struct mlx5e_priv *priv = cb_priv;
5613
5614         switch (type) {
5615         case TC_SETUP_CLSFLOWER:
5616                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5617         default:
5618                 return -EOPNOTSUPP;
5619         }
5620 }
5621
5622 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5623                          struct sk_buff *skb)
5624 {
5625 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5626         u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5627         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5628         struct mlx5e_tc_table *tc = &priv->fs.tc;
5629         struct tc_skb_ext *tc_skb_ext;
5630         int err;
5631
5632         reg_b = be32_to_cpu(cqe->ft_metadata);
5633
5634         chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5635
5636         err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
5637         if (err) {
5638                 netdev_dbg(priv->netdev,
5639                            "Couldn't find chain for chain tag: %d, err: %d\n",
5640                            chain_tag, err);
5641                 return false;
5642         }
5643
5644         if (chain) {
5645                 tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
5646                 if (WARN_ON(!tc_skb_ext))
5647                         return false;
5648
5649                 tc_skb_ext->chain = chain;
5650
5651                 zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
5652                         ESW_ZONE_ID_MASK;
5653
5654                 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5655                                               zone_restore_id))
5656                         return false;
5657         }
5658 #endif /* CONFIG_NET_TC_SKB_EXT */
5659
5660         return true;
5661 }