net/mlx5e: Don't use err uninitialized in mlx5e_attach_decap
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <net/tc_act/tc_gact.h>
38 #include <net/tc_act/tc_skbedit.h>
39 #include <linux/mlx5/fs.h>
40 #include <linux/mlx5/device.h>
41 #include <linux/rhashtable.h>
42 #include <linux/refcount.h>
43 #include <linux/completion.h>
44 #include <net/tc_act/tc_mirred.h>
45 #include <net/tc_act/tc_vlan.h>
46 #include <net/tc_act/tc_tunnel_key.h>
47 #include <net/tc_act/tc_pedit.h>
48 #include <net/tc_act/tc_csum.h>
49 #include <net/tc_act/tc_mpls.h>
50 #include <net/arp.h>
51 #include <net/ipv6_stubs.h>
52 #include <net/bareudp.h>
53 #include <net/bonding.h>
54 #include "en.h"
55 #include "en_rep.h"
56 #include "en/rep/tc.h"
57 #include "en/rep/neigh.h"
58 #include "en_tc.h"
59 #include "eswitch.h"
60 #include "esw/chains.h"
61 #include "fs_core.h"
62 #include "en/port.h"
63 #include "en/tc_tun.h"
64 #include "en/mapping.h"
65 #include "en/tc_ct.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "diag/en_tc_tracepoint.h"
69
70 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
71
72 struct mlx5_nic_flow_attr {
73         u32 action;
74         u32 flow_tag;
75         struct mlx5_modify_hdr *modify_hdr;
76         u32 hairpin_tirn;
77         u8 match_level;
78         struct mlx5_flow_table  *hairpin_ft;
79         struct mlx5_fc          *counter;
80 };
81
82 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
83
84 enum {
85         MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
86         MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
87         MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
88         MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
89         MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
90         MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
91         MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
92         MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
93         MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
94         MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
95         MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
96         MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
97         MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
98         MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
99 };
100
101 #define MLX5E_TC_MAX_SPLITS 1
102
103 /* Helper struct for accessing a struct containing list_head array.
104  * Containing struct
105  *   |- Helper array
106  *      [0] Helper item 0
107  *          |- list_head item 0
108  *          |- index (0)
109  *      [1] Helper item 1
110  *          |- list_head item 1
111  *          |- index (1)
112  * To access the containing struct from one of the list_head items:
113  * 1. Get the helper item from the list_head item using
114  *    helper item =
115  *        container_of(list_head item, helper struct type, list_head field)
116  * 2. Get the contining struct from the helper item and its index in the array:
117  *    containing struct =
118  *        container_of(helper item, containing struct type, helper field[index])
119  */
120 struct encap_flow_item {
121         struct mlx5e_encap_entry *e; /* attached encap instance */
122         struct list_head list;
123         int index;
124 };
125
126 struct mlx5e_tc_flow {
127         struct rhash_head       node;
128         struct mlx5e_priv       *priv;
129         u64                     cookie;
130         unsigned long           flags;
131         struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
132
133         /* flows sharing the same reformat object - currently mpls decap */
134         struct list_head l3_to_l2_reformat;
135         struct mlx5e_decap_entry *decap_reformat;
136
137         /* Flow can be associated with multiple encap IDs.
138          * The number of encaps is bounded by the number of supported
139          * destinations.
140          */
141         struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
142         struct mlx5e_tc_flow    *peer_flow;
143         struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
144         struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
145         struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
146         struct list_head        hairpin; /* flows sharing the same hairpin */
147         struct list_head        peer;    /* flows with peer flow */
148         struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
149         struct net_device       *orig_dev; /* netdev adding flow first */
150         int                     tmp_efi_index;
151         struct list_head        tmp_list; /* temporary flow list used by neigh update */
152         refcount_t              refcnt;
153         struct rcu_head         rcu_head;
154         struct completion       init_done;
155         int tunnel_id; /* the mapped tunnel id of this flow */
156
157         union {
158                 struct mlx5_esw_flow_attr esw_attr[0];
159                 struct mlx5_nic_flow_attr nic_attr[0];
160         };
161 };
162
163 struct mlx5e_tc_flow_parse_attr {
164         const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
165         struct net_device *filter_dev;
166         struct mlx5_flow_spec spec;
167         struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
168         int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
169         struct ethhdr eth;
170 };
171
172 #define MLX5E_TC_TABLE_NUM_GROUPS 4
173 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
174
175 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
176         [CHAIN_TO_REG] = {
177                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
178                 .moffset = 0,
179                 .mlen = 2,
180         },
181         [TUNNEL_TO_REG] = {
182                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
183                 .moffset = 3,
184                 .mlen = 1,
185                 .soffset = MLX5_BYTE_OFF(fte_match_param,
186                                          misc_parameters_2.metadata_reg_c_1),
187         },
188         [ZONE_TO_REG] = zone_to_reg_ct,
189         [CTSTATE_TO_REG] = ctstate_to_reg_ct,
190         [MARK_TO_REG] = mark_to_reg_ct,
191         [LABELS_TO_REG] = labels_to_reg_ct,
192         [FTEID_TO_REG] = fteid_to_reg_ct,
193         [TUPLEID_TO_REG] = tupleid_to_reg_ct,
194 };
195
196 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
197
198 void
199 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
200                             enum mlx5e_tc_attr_to_reg type,
201                             u32 data,
202                             u32 mask)
203 {
204         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
205         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
206         void *headers_c = spec->match_criteria;
207         void *headers_v = spec->match_value;
208         void *fmask, *fval;
209
210         fmask = headers_c + soffset;
211         fval = headers_v + soffset;
212
213         mask = cpu_to_be32(mask) >> (32 - (match_len * 8));
214         data = cpu_to_be32(data) >> (32 - (match_len * 8));
215
216         memcpy(fmask, &mask, match_len);
217         memcpy(fval, &data, match_len);
218
219         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
220 }
221
222 int
223 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
224                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
225                           enum mlx5e_tc_attr_to_reg type,
226                           u32 data)
227 {
228         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
229         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
230         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
231         char *modact;
232         int err;
233
234         err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB,
235                                     mod_hdr_acts);
236         if (err)
237                 return err;
238
239         modact = mod_hdr_acts->actions +
240                  (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
241
242         /* Firmware has 5bit length field and 0 means 32bits */
243         if (mlen == 4)
244                 mlen = 0;
245
246         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
247         MLX5_SET(set_action_in, modact, field, mfield);
248         MLX5_SET(set_action_in, modact, offset, moffset * 8);
249         MLX5_SET(set_action_in, modact, length, mlen * 8);
250         MLX5_SET(set_action_in, modact, data, data);
251         mod_hdr_acts->num_actions++;
252
253         return 0;
254 }
255
256 struct mlx5e_hairpin {
257         struct mlx5_hairpin *pair;
258
259         struct mlx5_core_dev *func_mdev;
260         struct mlx5e_priv *func_priv;
261         u32 tdn;
262         u32 tirn;
263
264         int num_channels;
265         struct mlx5e_rqt indir_rqt;
266         u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
267         struct mlx5e_ttc_table ttc;
268 };
269
270 struct mlx5e_hairpin_entry {
271         /* a node of a hash table which keeps all the  hairpin entries */
272         struct hlist_node hairpin_hlist;
273
274         /* protects flows list */
275         spinlock_t flows_lock;
276         /* flows sharing the same hairpin */
277         struct list_head flows;
278         /* hpe's that were not fully initialized when dead peer update event
279          * function traversed them.
280          */
281         struct list_head dead_peer_wait_list;
282
283         u16 peer_vhca_id;
284         u8 prio;
285         struct mlx5e_hairpin *hp;
286         refcount_t refcnt;
287         struct completion res_ready;
288 };
289
290 struct mod_hdr_key {
291         int num_actions;
292         void *actions;
293 };
294
295 struct mlx5e_mod_hdr_entry {
296         /* a node of a hash table which keeps all the mod_hdr entries */
297         struct hlist_node mod_hdr_hlist;
298
299         /* protects flows list */
300         spinlock_t flows_lock;
301         /* flows sharing the same mod_hdr entry */
302         struct list_head flows;
303
304         struct mod_hdr_key key;
305
306         struct mlx5_modify_hdr *modify_hdr;
307
308         refcount_t refcnt;
309         struct completion res_ready;
310         int compl_result;
311 };
312
313 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
314                               struct mlx5e_tc_flow *flow);
315
316 static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
317 {
318         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
319                 return ERR_PTR(-EINVAL);
320         return flow;
321 }
322
323 static void mlx5e_flow_put(struct mlx5e_priv *priv,
324                            struct mlx5e_tc_flow *flow)
325 {
326         if (refcount_dec_and_test(&flow->refcnt)) {
327                 mlx5e_tc_del_flow(priv, flow);
328                 kfree_rcu(flow, rcu_head);
329         }
330 }
331
332 static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
333 {
334         /* Complete all memory stores before setting bit. */
335         smp_mb__before_atomic();
336         set_bit(flag, &flow->flags);
337 }
338
339 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
340
341 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
342                                      unsigned long flag)
343 {
344         /* test_and_set_bit() provides all necessary barriers */
345         return test_and_set_bit(flag, &flow->flags);
346 }
347
348 #define flow_flag_test_and_set(flow, flag)                      \
349         __flow_flag_test_and_set(flow,                          \
350                                  MLX5E_TC_FLOW_FLAG_##flag)
351
352 static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
353 {
354         /* Complete all memory stores before clearing bit. */
355         smp_mb__before_atomic();
356         clear_bit(flag, &flow->flags);
357 }
358
359 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
360                                                       MLX5E_TC_FLOW_FLAG_##flag)
361
362 static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
363 {
364         bool ret = test_bit(flag, &flow->flags);
365
366         /* Read fields of flow structure only after checking flags. */
367         smp_mb__after_atomic();
368         return ret;
369 }
370
371 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
372                                                     MLX5E_TC_FLOW_FLAG_##flag)
373
374 static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
375 {
376         return flow_flag_test(flow, ESWITCH);
377 }
378
379 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
380 {
381         return flow_flag_test(flow, FT);
382 }
383
384 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
385 {
386         return flow_flag_test(flow, OFFLOADED);
387 }
388
389 static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
390 {
391         return jhash(key->actions,
392                      key->num_actions * MLX5_MH_ACT_SZ, 0);
393 }
394
395 static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
396                                    struct mod_hdr_key *b)
397 {
398         if (a->num_actions != b->num_actions)
399                 return 1;
400
401         return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
402 }
403
404 static struct mod_hdr_tbl *
405 get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
406 {
407         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
408
409         return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
410                 &priv->fs.tc.mod_hdr;
411 }
412
413 static struct mlx5e_mod_hdr_entry *
414 mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
415 {
416         struct mlx5e_mod_hdr_entry *mh, *found = NULL;
417
418         hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
419                 if (!cmp_mod_hdr_info(&mh->key, key)) {
420                         refcount_inc(&mh->refcnt);
421                         found = mh;
422                         break;
423                 }
424         }
425
426         return found;
427 }
428
429 static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
430                               struct mlx5e_mod_hdr_entry *mh,
431                               int namespace)
432 {
433         struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
434
435         if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
436                 return;
437         hash_del(&mh->mod_hdr_hlist);
438         mutex_unlock(&tbl->lock);
439
440         WARN_ON(!list_empty(&mh->flows));
441         if (mh->compl_result > 0)
442                 mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
443
444         kfree(mh);
445 }
446
447 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
448 {
449         return mlx5e_is_eswitch_flow(flow) ?
450                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
451 }
452 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
453                                 struct mlx5e_tc_flow *flow,
454                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
455 {
456         int num_actions, actions_size, namespace, err;
457         struct mlx5e_mod_hdr_entry *mh;
458         struct mod_hdr_tbl *tbl;
459         struct mod_hdr_key key;
460         u32 hash_key;
461
462         num_actions  = parse_attr->mod_hdr_acts.num_actions;
463         actions_size = MLX5_MH_ACT_SZ * num_actions;
464
465         key.actions = parse_attr->mod_hdr_acts.actions;
466         key.num_actions = num_actions;
467
468         hash_key = hash_mod_hdr_info(&key);
469
470         namespace = get_flow_name_space(flow);
471         tbl = get_mod_hdr_table(priv, namespace);
472
473         mutex_lock(&tbl->lock);
474         mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
475         if (mh) {
476                 mutex_unlock(&tbl->lock);
477                 wait_for_completion(&mh->res_ready);
478
479                 if (mh->compl_result < 0) {
480                         err = -EREMOTEIO;
481                         goto attach_header_err;
482                 }
483                 goto attach_flow;
484         }
485
486         mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
487         if (!mh) {
488                 mutex_unlock(&tbl->lock);
489                 return -ENOMEM;
490         }
491
492         mh->key.actions = (void *)mh + sizeof(*mh);
493         memcpy(mh->key.actions, key.actions, actions_size);
494         mh->key.num_actions = num_actions;
495         spin_lock_init(&mh->flows_lock);
496         INIT_LIST_HEAD(&mh->flows);
497         refcount_set(&mh->refcnt, 1);
498         init_completion(&mh->res_ready);
499
500         hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
501         mutex_unlock(&tbl->lock);
502
503         mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
504                                                   mh->key.num_actions,
505                                                   mh->key.actions);
506         if (IS_ERR(mh->modify_hdr)) {
507                 err = PTR_ERR(mh->modify_hdr);
508                 mh->compl_result = err;
509                 goto alloc_header_err;
510         }
511         mh->compl_result = 1;
512         complete_all(&mh->res_ready);
513
514 attach_flow:
515         flow->mh = mh;
516         spin_lock(&mh->flows_lock);
517         list_add(&flow->mod_hdr, &mh->flows);
518         spin_unlock(&mh->flows_lock);
519         if (mlx5e_is_eswitch_flow(flow))
520                 flow->esw_attr->modify_hdr = mh->modify_hdr;
521         else
522                 flow->nic_attr->modify_hdr = mh->modify_hdr;
523
524         return 0;
525
526 alloc_header_err:
527         complete_all(&mh->res_ready);
528 attach_header_err:
529         mlx5e_mod_hdr_put(priv, mh, namespace);
530         return err;
531 }
532
533 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
534                                  struct mlx5e_tc_flow *flow)
535 {
536         /* flow wasn't fully initialized */
537         if (!flow->mh)
538                 return;
539
540         spin_lock(&flow->mh->flows_lock);
541         list_del(&flow->mod_hdr);
542         spin_unlock(&flow->mh->flows_lock);
543
544         mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
545         flow->mh = NULL;
546 }
547
548 static
549 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
550 {
551         struct net_device *netdev;
552         struct mlx5e_priv *priv;
553
554         netdev = __dev_get_by_index(net, ifindex);
555         priv = netdev_priv(netdev);
556         return priv->mdev;
557 }
558
559 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
560 {
561         u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
562         void *tirc;
563         int err;
564
565         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
566         if (err)
567                 goto alloc_tdn_err;
568
569         tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
570
571         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
572         MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
573         MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
574
575         err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
576         if (err)
577                 goto create_tir_err;
578
579         return 0;
580
581 create_tir_err:
582         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
583 alloc_tdn_err:
584         return err;
585 }
586
587 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
588 {
589         mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
590         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
591 }
592
593 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
594 {
595         u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
596         struct mlx5e_priv *priv = hp->func_priv;
597         int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
598
599         mlx5e_build_default_indir_rqt(indirection_rqt, sz,
600                                       hp->num_channels);
601
602         for (i = 0; i < sz; i++) {
603                 ix = i;
604                 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
605                         ix = mlx5e_bits_invert(i, ilog2(sz));
606                 ix = indirection_rqt[ix];
607                 rqn = hp->pair->rqn[ix];
608                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
609         }
610 }
611
612 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
613 {
614         int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
615         struct mlx5e_priv *priv = hp->func_priv;
616         struct mlx5_core_dev *mdev = priv->mdev;
617         void *rqtc;
618         u32 *in;
619
620         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
621         in = kvzalloc(inlen, GFP_KERNEL);
622         if (!in)
623                 return -ENOMEM;
624
625         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
626
627         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
628         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
629
630         mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
631
632         err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
633         if (!err)
634                 hp->indir_rqt.enabled = true;
635
636         kvfree(in);
637         return err;
638 }
639
640 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
641 {
642         struct mlx5e_priv *priv = hp->func_priv;
643         u32 in[MLX5_ST_SZ_DW(create_tir_in)];
644         int tt, i, err;
645         void *tirc;
646
647         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
648                 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
649
650                 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
651                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
652
653                 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
654                 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
655                 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
656                 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
657
658                 err = mlx5_core_create_tir(hp->func_mdev, in,
659                                            &hp->indir_tirn[tt]);
660                 if (err) {
661                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
662                         goto err_destroy_tirs;
663                 }
664         }
665         return 0;
666
667 err_destroy_tirs:
668         for (i = 0; i < tt; i++)
669                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
670         return err;
671 }
672
673 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
674 {
675         int tt;
676
677         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
678                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
679 }
680
681 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
682                                          struct ttc_params *ttc_params)
683 {
684         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
685         int tt;
686
687         memset(ttc_params, 0, sizeof(*ttc_params));
688
689         ttc_params->any_tt_tirn = hp->tirn;
690
691         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
692                 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
693
694         ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
695         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
696         ft_attr->prio = MLX5E_TC_PRIO;
697 }
698
699 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
700 {
701         struct mlx5e_priv *priv = hp->func_priv;
702         struct ttc_params ttc_params;
703         int err;
704
705         err = mlx5e_hairpin_create_indirect_rqt(hp);
706         if (err)
707                 return err;
708
709         err = mlx5e_hairpin_create_indirect_tirs(hp);
710         if (err)
711                 goto err_create_indirect_tirs;
712
713         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
714         err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
715         if (err)
716                 goto err_create_ttc_table;
717
718         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
719                    hp->num_channels, hp->ttc.ft.t->id);
720
721         return 0;
722
723 err_create_ttc_table:
724         mlx5e_hairpin_destroy_indirect_tirs(hp);
725 err_create_indirect_tirs:
726         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
727
728         return err;
729 }
730
731 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
732 {
733         struct mlx5e_priv *priv = hp->func_priv;
734
735         mlx5e_destroy_ttc_table(priv, &hp->ttc);
736         mlx5e_hairpin_destroy_indirect_tirs(hp);
737         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
738 }
739
740 static struct mlx5e_hairpin *
741 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
742                      int peer_ifindex)
743 {
744         struct mlx5_core_dev *func_mdev, *peer_mdev;
745         struct mlx5e_hairpin *hp;
746         struct mlx5_hairpin *pair;
747         int err;
748
749         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
750         if (!hp)
751                 return ERR_PTR(-ENOMEM);
752
753         func_mdev = priv->mdev;
754         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
755
756         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
757         if (IS_ERR(pair)) {
758                 err = PTR_ERR(pair);
759                 goto create_pair_err;
760         }
761         hp->pair = pair;
762         hp->func_mdev = func_mdev;
763         hp->func_priv = priv;
764         hp->num_channels = params->num_channels;
765
766         err = mlx5e_hairpin_create_transport(hp);
767         if (err)
768                 goto create_transport_err;
769
770         if (hp->num_channels > 1) {
771                 err = mlx5e_hairpin_rss_init(hp);
772                 if (err)
773                         goto rss_init_err;
774         }
775
776         return hp;
777
778 rss_init_err:
779         mlx5e_hairpin_destroy_transport(hp);
780 create_transport_err:
781         mlx5_core_hairpin_destroy(hp->pair);
782 create_pair_err:
783         kfree(hp);
784         return ERR_PTR(err);
785 }
786
787 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
788 {
789         if (hp->num_channels > 1)
790                 mlx5e_hairpin_rss_cleanup(hp);
791         mlx5e_hairpin_destroy_transport(hp);
792         mlx5_core_hairpin_destroy(hp->pair);
793         kvfree(hp);
794 }
795
796 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
797 {
798         return (peer_vhca_id << 16 | prio);
799 }
800
801 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
802                                                      u16 peer_vhca_id, u8 prio)
803 {
804         struct mlx5e_hairpin_entry *hpe;
805         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
806
807         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
808                                hairpin_hlist, hash_key) {
809                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
810                         refcount_inc(&hpe->refcnt);
811                         return hpe;
812                 }
813         }
814
815         return NULL;
816 }
817
818 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
819                               struct mlx5e_hairpin_entry *hpe)
820 {
821         /* no more hairpin flows for us, release the hairpin pair */
822         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
823                 return;
824         hash_del(&hpe->hairpin_hlist);
825         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
826
827         if (!IS_ERR_OR_NULL(hpe->hp)) {
828                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
829                            dev_name(hpe->hp->pair->peer_mdev->device));
830
831                 mlx5e_hairpin_destroy(hpe->hp);
832         }
833
834         WARN_ON(!list_empty(&hpe->flows));
835         kfree(hpe);
836 }
837
838 #define UNKNOWN_MATCH_PRIO 8
839
840 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
841                                   struct mlx5_flow_spec *spec, u8 *match_prio,
842                                   struct netlink_ext_ack *extack)
843 {
844         void *headers_c, *headers_v;
845         u8 prio_val, prio_mask = 0;
846         bool vlan_present;
847
848 #ifdef CONFIG_MLX5_CORE_EN_DCB
849         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
850                 NL_SET_ERR_MSG_MOD(extack,
851                                    "only PCP trust state supported for hairpin");
852                 return -EOPNOTSUPP;
853         }
854 #endif
855         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
856         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
857
858         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
859         if (vlan_present) {
860                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
861                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
862         }
863
864         if (!vlan_present || !prio_mask) {
865                 prio_val = UNKNOWN_MATCH_PRIO;
866         } else if (prio_mask != 0x7) {
867                 NL_SET_ERR_MSG_MOD(extack,
868                                    "masked priority match not supported for hairpin");
869                 return -EOPNOTSUPP;
870         }
871
872         *match_prio = prio_val;
873         return 0;
874 }
875
876 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
877                                   struct mlx5e_tc_flow *flow,
878                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
879                                   struct netlink_ext_ack *extack)
880 {
881         int peer_ifindex = parse_attr->mirred_ifindex[0];
882         struct mlx5_hairpin_params params;
883         struct mlx5_core_dev *peer_mdev;
884         struct mlx5e_hairpin_entry *hpe;
885         struct mlx5e_hairpin *hp;
886         u64 link_speed64;
887         u32 link_speed;
888         u8 match_prio;
889         u16 peer_id;
890         int err;
891
892         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
893         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
894                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
895                 return -EOPNOTSUPP;
896         }
897
898         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
899         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
900                                      extack);
901         if (err)
902                 return err;
903
904         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
905         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
906         if (hpe) {
907                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
908                 wait_for_completion(&hpe->res_ready);
909
910                 if (IS_ERR(hpe->hp)) {
911                         err = -EREMOTEIO;
912                         goto out_err;
913                 }
914                 goto attach_flow;
915         }
916
917         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
918         if (!hpe) {
919                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
920                 return -ENOMEM;
921         }
922
923         spin_lock_init(&hpe->flows_lock);
924         INIT_LIST_HEAD(&hpe->flows);
925         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
926         hpe->peer_vhca_id = peer_id;
927         hpe->prio = match_prio;
928         refcount_set(&hpe->refcnt, 1);
929         init_completion(&hpe->res_ready);
930
931         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
932                  hash_hairpin_info(peer_id, match_prio));
933         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
934
935         params.log_data_size = 15;
936         params.log_data_size = min_t(u8, params.log_data_size,
937                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
938         params.log_data_size = max_t(u8, params.log_data_size,
939                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
940
941         params.log_num_packets = params.log_data_size -
942                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
943         params.log_num_packets = min_t(u8, params.log_num_packets,
944                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
945
946         params.q_counter = priv->q_counter;
947         /* set hairpin pair per each 50Gbs share of the link */
948         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
949         link_speed = max_t(u32, link_speed, 50000);
950         link_speed64 = link_speed;
951         do_div(link_speed64, 50000);
952         params.num_channels = link_speed64;
953
954         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
955         hpe->hp = hp;
956         complete_all(&hpe->res_ready);
957         if (IS_ERR(hp)) {
958                 err = PTR_ERR(hp);
959                 goto out_err;
960         }
961
962         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
963                    hp->tirn, hp->pair->rqn[0],
964                    dev_name(hp->pair->peer_mdev->device),
965                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
966
967 attach_flow:
968         if (hpe->hp->num_channels > 1) {
969                 flow_flag_set(flow, HAIRPIN_RSS);
970                 flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
971         } else {
972                 flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
973         }
974
975         flow->hpe = hpe;
976         spin_lock(&hpe->flows_lock);
977         list_add(&flow->hairpin, &hpe->flows);
978         spin_unlock(&hpe->flows_lock);
979
980         return 0;
981
982 out_err:
983         mlx5e_hairpin_put(priv, hpe);
984         return err;
985 }
986
987 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
988                                    struct mlx5e_tc_flow *flow)
989 {
990         /* flow wasn't fully initialized */
991         if (!flow->hpe)
992                 return;
993
994         spin_lock(&flow->hpe->flows_lock);
995         list_del(&flow->hairpin);
996         spin_unlock(&flow->hpe->flows_lock);
997
998         mlx5e_hairpin_put(priv, flow->hpe);
999         flow->hpe = NULL;
1000 }
1001
1002 static int
1003 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1004                       struct mlx5e_tc_flow_parse_attr *parse_attr,
1005                       struct mlx5e_tc_flow *flow,
1006                       struct netlink_ext_ack *extack)
1007 {
1008         struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
1009         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1010         struct mlx5_core_dev *dev = priv->mdev;
1011         struct mlx5_flow_destination dest[2] = {};
1012         struct mlx5_flow_act flow_act = {
1013                 .action = attr->action,
1014                 .flags    = FLOW_ACT_NO_APPEND,
1015         };
1016         struct mlx5_fc *counter = NULL;
1017         int err, dest_ix = 0;
1018
1019         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1020         flow_context->flow_tag = attr->flow_tag;
1021
1022         if (flow_flag_test(flow, HAIRPIN)) {
1023                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1024                 if (err)
1025                         return err;
1026
1027                 if (flow_flag_test(flow, HAIRPIN_RSS)) {
1028                         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1029                         dest[dest_ix].ft = attr->hairpin_ft;
1030                 } else {
1031                         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1032                         dest[dest_ix].tir_num = attr->hairpin_tirn;
1033                 }
1034                 dest_ix++;
1035         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1036                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1037                 dest[dest_ix].ft = priv->fs.vlan.ft.t;
1038                 dest_ix++;
1039         }
1040
1041         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1042                 counter = mlx5_fc_create(dev, true);
1043                 if (IS_ERR(counter))
1044                         return PTR_ERR(counter);
1045
1046                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1047                 dest[dest_ix].counter_id = mlx5_fc_id(counter);
1048                 dest_ix++;
1049                 attr->counter = counter;
1050         }
1051
1052         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1053                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1054                 flow_act.modify_hdr = attr->modify_hdr;
1055                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1056                 if (err)
1057                         return err;
1058         }
1059
1060         mutex_lock(&priv->fs.tc.t_lock);
1061         if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
1062                 struct mlx5_flow_table_attr ft_attr = {};
1063                 int tc_grp_size, tc_tbl_size, tc_num_grps;
1064                 u32 max_flow_counter;
1065
1066                 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
1067                                     MLX5_CAP_GEN(dev, max_flow_counter_15_0);
1068
1069                 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
1070
1071                 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
1072                                     BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
1073                 tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
1074
1075                 ft_attr.prio = MLX5E_TC_PRIO;
1076                 ft_attr.max_fte = tc_tbl_size;
1077                 ft_attr.level = MLX5E_TC_FT_LEVEL;
1078                 ft_attr.autogroup.max_num_groups = tc_num_grps;
1079                 priv->fs.tc.t =
1080                         mlx5_create_auto_grouped_flow_table(priv->fs.ns,
1081                                                             &ft_attr);
1082                 if (IS_ERR(priv->fs.tc.t)) {
1083                         mutex_unlock(&priv->fs.tc.t_lock);
1084                         NL_SET_ERR_MSG_MOD(extack,
1085                                            "Failed to create tc offload table");
1086                         netdev_err(priv->netdev,
1087                                    "Failed to create tc offload table\n");
1088                         return PTR_ERR(priv->fs.tc.t);
1089                 }
1090         }
1091
1092         if (attr->match_level != MLX5_MATCH_NONE)
1093                 parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1094
1095         flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
1096                                             &flow_act, dest, dest_ix);
1097         mutex_unlock(&priv->fs.tc.t_lock);
1098
1099         return PTR_ERR_OR_ZERO(flow->rule[0]);
1100 }
1101
1102 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1103                                   struct mlx5e_tc_flow *flow)
1104 {
1105         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1106         struct mlx5_fc *counter = NULL;
1107
1108         counter = attr->counter;
1109         if (!IS_ERR_OR_NULL(flow->rule[0]))
1110                 mlx5_del_flow_rules(flow->rule[0]);
1111         mlx5_fc_destroy(priv->mdev, counter);
1112
1113         mutex_lock(&priv->fs.tc.t_lock);
1114         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
1115                 mlx5_destroy_flow_table(priv->fs.tc.t);
1116                 priv->fs.tc.t = NULL;
1117         }
1118         mutex_unlock(&priv->fs.tc.t_lock);
1119
1120         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1121                 mlx5e_detach_mod_hdr(priv, flow);
1122
1123         if (flow_flag_test(flow, HAIRPIN))
1124                 mlx5e_hairpin_flow_del(priv, flow);
1125 }
1126
1127 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1128                                struct mlx5e_tc_flow *flow, int out_index);
1129
1130 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1131                               struct mlx5e_tc_flow *flow,
1132                               struct net_device *mirred_dev,
1133                               int out_index,
1134                               struct netlink_ext_ack *extack,
1135                               struct net_device **encap_dev,
1136                               bool *encap_valid);
1137 static int mlx5e_attach_decap(struct mlx5e_priv *priv,
1138                               struct mlx5e_tc_flow *flow,
1139                               struct netlink_ext_ack *extack);
1140 static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1141                                struct mlx5e_tc_flow *flow);
1142
1143 static struct mlx5_flow_handle *
1144 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1145                            struct mlx5e_tc_flow *flow,
1146                            struct mlx5_flow_spec *spec,
1147                            struct mlx5_esw_flow_attr *attr)
1148 {
1149         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1150         struct mlx5_flow_handle *rule;
1151
1152         if (flow_flag_test(flow, CT)) {
1153                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1154
1155                 return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr,
1156                                                mod_hdr_acts);
1157         }
1158
1159         rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1160         if (IS_ERR(rule))
1161                 return rule;
1162
1163         if (attr->split_count) {
1164                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1165                 if (IS_ERR(flow->rule[1])) {
1166                         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1167                         return flow->rule[1];
1168                 }
1169         }
1170
1171         return rule;
1172 }
1173
1174 static void
1175 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1176                              struct mlx5e_tc_flow *flow,
1177                              struct mlx5_esw_flow_attr *attr)
1178 {
1179         flow_flag_clear(flow, OFFLOADED);
1180
1181         if (flow_flag_test(flow, CT)) {
1182                 mlx5_tc_ct_delete_flow(flow->priv, flow, attr);
1183                 return;
1184         }
1185
1186         if (attr->split_count)
1187                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1188
1189         mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1190 }
1191
1192 static struct mlx5_flow_handle *
1193 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1194                               struct mlx5e_tc_flow *flow,
1195                               struct mlx5_flow_spec *spec)
1196 {
1197         struct mlx5_esw_flow_attr slow_attr;
1198         struct mlx5_flow_handle *rule;
1199
1200         memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
1201         slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1202         slow_attr.split_count = 0;
1203         slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1204
1205         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, &slow_attr);
1206         if (!IS_ERR(rule))
1207                 flow_flag_set(flow, SLOW);
1208
1209         return rule;
1210 }
1211
1212 static void
1213 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1214                                   struct mlx5e_tc_flow *flow)
1215 {
1216         struct mlx5_esw_flow_attr slow_attr;
1217
1218         memcpy(&slow_attr, flow->esw_attr, sizeof(slow_attr));
1219         slow_attr.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1220         slow_attr.split_count = 0;
1221         slow_attr.flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1222         mlx5e_tc_unoffload_fdb_rules(esw, flow, &slow_attr);
1223         flow_flag_clear(flow, SLOW);
1224 }
1225
1226 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1227  * function.
1228  */
1229 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1230                              struct list_head *unready_flows)
1231 {
1232         flow_flag_set(flow, NOT_READY);
1233         list_add_tail(&flow->unready, unready_flows);
1234 }
1235
1236 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1237  * function.
1238  */
1239 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1240 {
1241         list_del(&flow->unready);
1242         flow_flag_clear(flow, NOT_READY);
1243 }
1244
1245 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1246 {
1247         struct mlx5_rep_uplink_priv *uplink_priv;
1248         struct mlx5e_rep_priv *rpriv;
1249         struct mlx5_eswitch *esw;
1250
1251         esw = flow->priv->mdev->priv.eswitch;
1252         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1253         uplink_priv = &rpriv->uplink_priv;
1254
1255         mutex_lock(&uplink_priv->unready_flows_lock);
1256         unready_flow_add(flow, &uplink_priv->unready_flows);
1257         mutex_unlock(&uplink_priv->unready_flows_lock);
1258 }
1259
1260 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1261 {
1262         struct mlx5_rep_uplink_priv *uplink_priv;
1263         struct mlx5e_rep_priv *rpriv;
1264         struct mlx5_eswitch *esw;
1265
1266         esw = flow->priv->mdev->priv.eswitch;
1267         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1268         uplink_priv = &rpriv->uplink_priv;
1269
1270         mutex_lock(&uplink_priv->unready_flows_lock);
1271         unready_flow_del(flow);
1272         mutex_unlock(&uplink_priv->unready_flows_lock);
1273 }
1274
1275 static int
1276 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1277                       struct mlx5e_tc_flow *flow,
1278                       struct netlink_ext_ack *extack)
1279 {
1280         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1281         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1282         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
1283         struct net_device *out_dev, *encap_dev = NULL;
1284         struct mlx5_fc *counter = NULL;
1285         struct mlx5e_rep_priv *rpriv;
1286         struct mlx5e_priv *out_priv;
1287         bool encap_valid = true;
1288         u32 max_prio, max_chain;
1289         int err = 0;
1290         int out_index;
1291
1292         if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
1293                 NL_SET_ERR_MSG_MOD(extack,
1294                                    "E-switch priorities unsupported, upgrade FW");
1295                 return -EOPNOTSUPP;
1296         }
1297
1298         /* We check chain range only for tc flows.
1299          * For ft flows, we checked attr->chain was originally 0 and set it to
1300          * FDB_FT_CHAIN which is outside tc range.
1301          * See mlx5e_rep_setup_ft_cb().
1302          */
1303         max_chain = mlx5_esw_chains_get_chain_range(esw);
1304         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1305                 NL_SET_ERR_MSG_MOD(extack,
1306                                    "Requested chain is out of supported range");
1307                 return -EOPNOTSUPP;
1308         }
1309
1310         max_prio = mlx5_esw_chains_get_prio_range(esw);
1311         if (attr->prio > max_prio) {
1312                 NL_SET_ERR_MSG_MOD(extack,
1313                                    "Requested priority is out of supported range");
1314                 return -EOPNOTSUPP;
1315         }
1316
1317         if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1318                 err = mlx5e_attach_decap(priv, flow, extack);
1319                 if (err)
1320                         return err;
1321         }
1322
1323         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1324                 int mirred_ifindex;
1325
1326                 if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1327                         continue;
1328
1329                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1330                 out_dev = __dev_get_by_index(dev_net(priv->netdev),
1331                                              mirred_ifindex);
1332                 err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1333                                          extack, &encap_dev, &encap_valid);
1334                 if (err)
1335                         return err;
1336
1337                 out_priv = netdev_priv(encap_dev);
1338                 rpriv = out_priv->ppriv;
1339                 attr->dests[out_index].rep = rpriv->rep;
1340                 attr->dests[out_index].mdev = out_priv->mdev;
1341         }
1342
1343         err = mlx5_eswitch_add_vlan_action(esw, attr);
1344         if (err)
1345                 return err;
1346
1347         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1348             !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
1349                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1350                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1351                 if (err)
1352                         return err;
1353         }
1354
1355         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1356                 counter = mlx5_fc_create(attr->counter_dev, true);
1357                 if (IS_ERR(counter))
1358                         return PTR_ERR(counter);
1359
1360                 attr->counter = counter;
1361         }
1362
1363         /* we get here if one of the following takes place:
1364          * (1) there's no error
1365          * (2) there's an encap action and we don't have valid neigh
1366          */
1367         if (!encap_valid)
1368                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1369         else
1370                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1371
1372         if (IS_ERR(flow->rule[0]))
1373                 return PTR_ERR(flow->rule[0]);
1374         else
1375                 flow_flag_set(flow, OFFLOADED);
1376
1377         return 0;
1378 }
1379
1380 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1381 {
1382         struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
1383         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1384                                        spec->match_value,
1385                                        misc_parameters_3);
1386         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1387                                              headers_v,
1388                                              geneve_tlv_option_0_data);
1389
1390         return !!geneve_tlv_opt_0_data;
1391 }
1392
1393 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1394                                   struct mlx5e_tc_flow *flow)
1395 {
1396         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1397         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1398         int out_index;
1399
1400         mlx5e_put_flow_tunnel_id(flow);
1401
1402         if (flow_flag_test(flow, NOT_READY)) {
1403                 remove_unready_flow(flow);
1404                 kvfree(attr->parse_attr);
1405                 return;
1406         }
1407
1408         if (mlx5e_is_offloaded_flow(flow)) {
1409                 if (flow_flag_test(flow, SLOW))
1410                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1411                 else
1412                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1413         }
1414
1415         if (mlx5_flow_has_geneve_opt(flow))
1416                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1417
1418         mlx5_eswitch_del_vlan_action(esw, attr);
1419
1420         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1421                 if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1422                         mlx5e_detach_encap(priv, flow, out_index);
1423                         kfree(attr->parse_attr->tun_info[out_index]);
1424                 }
1425         kvfree(attr->parse_attr);
1426
1427         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1428                 mlx5e_detach_mod_hdr(priv, flow);
1429
1430         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1431                 mlx5_fc_destroy(attr->counter_dev, attr->counter);
1432
1433         if (flow_flag_test(flow, L3_TO_L2_DECAP))
1434                 mlx5e_detach_decap(priv, flow);
1435 }
1436
1437 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1438                               struct mlx5e_encap_entry *e,
1439                               struct list_head *flow_list)
1440 {
1441         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1442         struct mlx5_esw_flow_attr *esw_attr;
1443         struct mlx5_flow_handle *rule;
1444         struct mlx5_flow_spec *spec;
1445         struct mlx5e_tc_flow *flow;
1446         int err;
1447
1448         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1449                                                      e->reformat_type,
1450                                                      e->encap_size, e->encap_header,
1451                                                      MLX5_FLOW_NAMESPACE_FDB);
1452         if (IS_ERR(e->pkt_reformat)) {
1453                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1454                                PTR_ERR(e->pkt_reformat));
1455                 return;
1456         }
1457         e->flags |= MLX5_ENCAP_ENTRY_VALID;
1458         mlx5e_rep_queue_neigh_stats_work(priv);
1459
1460         list_for_each_entry(flow, flow_list, tmp_list) {
1461                 bool all_flow_encaps_valid = true;
1462                 int i;
1463
1464                 if (!mlx5e_is_offloaded_flow(flow))
1465                         continue;
1466                 esw_attr = flow->esw_attr;
1467                 spec = &esw_attr->parse_attr->spec;
1468
1469                 esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1470                 esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1471                 /* Flow can be associated with multiple encap entries.
1472                  * Before offloading the flow verify that all of them have
1473                  * a valid neighbour.
1474                  */
1475                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1476                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1477                                 continue;
1478                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1479                                 all_flow_encaps_valid = false;
1480                                 break;
1481                         }
1482                 }
1483                 /* Do not offload flows with unresolved neighbors */
1484                 if (!all_flow_encaps_valid)
1485                         continue;
1486                 /* update from slow path rule to encap rule */
1487                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
1488                 if (IS_ERR(rule)) {
1489                         err = PTR_ERR(rule);
1490                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1491                                        err);
1492                         continue;
1493                 }
1494
1495                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1496                 flow->rule[0] = rule;
1497                 /* was unset when slow path rule removed */
1498                 flow_flag_set(flow, OFFLOADED);
1499         }
1500 }
1501
1502 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1503                               struct mlx5e_encap_entry *e,
1504                               struct list_head *flow_list)
1505 {
1506         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1507         struct mlx5_flow_handle *rule;
1508         struct mlx5_flow_spec *spec;
1509         struct mlx5e_tc_flow *flow;
1510         int err;
1511
1512         list_for_each_entry(flow, flow_list, tmp_list) {
1513                 if (!mlx5e_is_offloaded_flow(flow))
1514                         continue;
1515                 spec = &flow->esw_attr->parse_attr->spec;
1516
1517                 /* update from encap rule to slow path rule */
1518                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1519                 /* mark the flow's encap dest as non-valid */
1520                 flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1521
1522                 if (IS_ERR(rule)) {
1523                         err = PTR_ERR(rule);
1524                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1525                                        err);
1526                         continue;
1527                 }
1528
1529                 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
1530                 flow->rule[0] = rule;
1531                 /* was unset when fast path rule removed */
1532                 flow_flag_set(flow, OFFLOADED);
1533         }
1534
1535         /* we know that the encap is valid */
1536         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1537         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1538 }
1539
1540 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1541 {
1542         if (mlx5e_is_eswitch_flow(flow))
1543                 return flow->esw_attr->counter;
1544         else
1545                 return flow->nic_attr->counter;
1546 }
1547
1548 /* Takes reference to all flows attached to encap and adds the flows to
1549  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1550  */
1551 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1552 {
1553         struct encap_flow_item *efi;
1554         struct mlx5e_tc_flow *flow;
1555
1556         list_for_each_entry(efi, &e->flows, list) {
1557                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1558                 if (IS_ERR(mlx5e_flow_get(flow)))
1559                         continue;
1560                 wait_for_completion(&flow->init_done);
1561
1562                 flow->tmp_efi_index = efi->index;
1563                 list_add(&flow->tmp_list, flow_list);
1564         }
1565 }
1566
1567 /* Iterate over tmp_list of flows attached to flow_list head. */
1568 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1569 {
1570         struct mlx5e_tc_flow *flow, *tmp;
1571
1572         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1573                 mlx5e_flow_put(priv, flow);
1574 }
1575
1576 static struct mlx5e_encap_entry *
1577 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1578                            struct mlx5e_encap_entry *e)
1579 {
1580         struct mlx5e_encap_entry *next = NULL;
1581
1582 retry:
1583         rcu_read_lock();
1584
1585         /* find encap with non-zero reference counter value */
1586         for (next = e ?
1587                      list_next_or_null_rcu(&nhe->encap_list,
1588                                            &e->encap_list,
1589                                            struct mlx5e_encap_entry,
1590                                            encap_list) :
1591                      list_first_or_null_rcu(&nhe->encap_list,
1592                                             struct mlx5e_encap_entry,
1593                                             encap_list);
1594              next;
1595              next = list_next_or_null_rcu(&nhe->encap_list,
1596                                           &next->encap_list,
1597                                           struct mlx5e_encap_entry,
1598                                           encap_list))
1599                 if (mlx5e_encap_take(next))
1600                         break;
1601
1602         rcu_read_unlock();
1603
1604         /* release starting encap */
1605         if (e)
1606                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
1607         if (!next)
1608                 return next;
1609
1610         /* wait for encap to be fully initialized */
1611         wait_for_completion(&next->res_ready);
1612         /* continue searching if encap entry is not in valid state after completion */
1613         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1614                 e = next;
1615                 goto retry;
1616         }
1617
1618         return next;
1619 }
1620
1621 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1622 {
1623         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1624         struct mlx5e_encap_entry *e = NULL;
1625         struct mlx5e_tc_flow *flow;
1626         struct mlx5_fc *counter;
1627         struct neigh_table *tbl;
1628         bool neigh_used = false;
1629         struct neighbour *n;
1630         u64 lastuse;
1631
1632         if (m_neigh->family == AF_INET)
1633                 tbl = &arp_tbl;
1634 #if IS_ENABLED(CONFIG_IPV6)
1635         else if (m_neigh->family == AF_INET6)
1636                 tbl = ipv6_stub->nd_tbl;
1637 #endif
1638         else
1639                 return;
1640
1641         /* mlx5e_get_next_valid_encap() releases previous encap before returning
1642          * next one.
1643          */
1644         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1645                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1646                 struct encap_flow_item *efi, *tmp;
1647                 struct mlx5_eswitch *esw;
1648                 LIST_HEAD(flow_list);
1649
1650                 esw = priv->mdev->priv.eswitch;
1651                 mutex_lock(&esw->offloads.encap_tbl_lock);
1652                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1653                         flow = container_of(efi, struct mlx5e_tc_flow,
1654                                             encaps[efi->index]);
1655                         if (IS_ERR(mlx5e_flow_get(flow)))
1656                                 continue;
1657                         list_add(&flow->tmp_list, &flow_list);
1658
1659                         if (mlx5e_is_offloaded_flow(flow)) {
1660                                 counter = mlx5e_tc_get_counter(flow);
1661                                 lastuse = mlx5_fc_query_lastuse(counter);
1662                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1663                                         neigh_used = true;
1664                                         break;
1665                                 }
1666                         }
1667                 }
1668                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1669
1670                 mlx5e_put_encap_flow_list(priv, &flow_list);
1671                 if (neigh_used) {
1672                         /* release current encap before breaking the loop */
1673                         mlx5e_encap_put(priv, e);
1674                         break;
1675                 }
1676         }
1677
1678         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1679
1680         if (neigh_used) {
1681                 nhe->reported_lastuse = jiffies;
1682
1683                 /* find the relevant neigh according to the cached device and
1684                  * dst ip pair
1685                  */
1686                 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1687                 if (!n)
1688                         return;
1689
1690                 neigh_event_send(n, NULL);
1691                 neigh_release(n);
1692         }
1693 }
1694
1695 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1696 {
1697         WARN_ON(!list_empty(&e->flows));
1698
1699         if (e->compl_result > 0) {
1700                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1701
1702                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1703                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1704         }
1705
1706         kfree(e->tun_info);
1707         kfree(e->encap_header);
1708         kfree_rcu(e, rcu);
1709 }
1710
1711 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
1712                                 struct mlx5e_decap_entry *d)
1713 {
1714         WARN_ON(!list_empty(&d->flows));
1715
1716         if (!d->compl_result)
1717                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
1718
1719         kfree_rcu(d, rcu);
1720 }
1721
1722 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1723 {
1724         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1725
1726         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1727                 return;
1728         hash_del_rcu(&e->encap_hlist);
1729         mutex_unlock(&esw->offloads.encap_tbl_lock);
1730
1731         mlx5e_encap_dealloc(priv, e);
1732 }
1733
1734 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
1735 {
1736         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1737
1738         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
1739                 return;
1740         hash_del_rcu(&d->hlist);
1741         mutex_unlock(&esw->offloads.decap_tbl_lock);
1742
1743         mlx5e_decap_dealloc(priv, d);
1744 }
1745
1746 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1747                                struct mlx5e_tc_flow *flow, int out_index)
1748 {
1749         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1750         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1751
1752         /* flow wasn't fully initialized */
1753         if (!e)
1754                 return;
1755
1756         mutex_lock(&esw->offloads.encap_tbl_lock);
1757         list_del(&flow->encaps[out_index].list);
1758         flow->encaps[out_index].e = NULL;
1759         if (!refcount_dec_and_test(&e->refcnt)) {
1760                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1761                 return;
1762         }
1763         hash_del_rcu(&e->encap_hlist);
1764         mutex_unlock(&esw->offloads.encap_tbl_lock);
1765
1766         mlx5e_encap_dealloc(priv, e);
1767 }
1768
1769 static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1770                                struct mlx5e_tc_flow *flow)
1771 {
1772         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1773         struct mlx5e_decap_entry *d = flow->decap_reformat;
1774
1775         if (!d)
1776                 return;
1777
1778         mutex_lock(&esw->offloads.decap_tbl_lock);
1779         list_del(&flow->l3_to_l2_reformat);
1780         flow->decap_reformat = NULL;
1781
1782         if (!refcount_dec_and_test(&d->refcnt)) {
1783                 mutex_unlock(&esw->offloads.decap_tbl_lock);
1784                 return;
1785         }
1786         hash_del_rcu(&d->hlist);
1787         mutex_unlock(&esw->offloads.decap_tbl_lock);
1788
1789         mlx5e_decap_dealloc(priv, d);
1790 }
1791
1792 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1793 {
1794         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1795
1796         if (!flow_flag_test(flow, ESWITCH) ||
1797             !flow_flag_test(flow, DUP))
1798                 return;
1799
1800         mutex_lock(&esw->offloads.peer_mutex);
1801         list_del(&flow->peer);
1802         mutex_unlock(&esw->offloads.peer_mutex);
1803
1804         flow_flag_clear(flow, DUP);
1805
1806         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1807                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1808                 kfree(flow->peer_flow);
1809         }
1810
1811         flow->peer_flow = NULL;
1812 }
1813
1814 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1815 {
1816         struct mlx5_core_dev *dev = flow->priv->mdev;
1817         struct mlx5_devcom *devcom = dev->priv.devcom;
1818         struct mlx5_eswitch *peer_esw;
1819
1820         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1821         if (!peer_esw)
1822                 return;
1823
1824         __mlx5e_tc_del_fdb_peer_flow(flow);
1825         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1826 }
1827
1828 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1829                               struct mlx5e_tc_flow *flow)
1830 {
1831         if (mlx5e_is_eswitch_flow(flow)) {
1832                 mlx5e_tc_del_fdb_peer_flow(flow);
1833                 mlx5e_tc_del_fdb_flow(priv, flow);
1834         } else {
1835                 mlx5e_tc_del_nic_flow(priv, flow);
1836         }
1837 }
1838
1839 static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
1840 {
1841         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1842         struct flow_action *flow_action = &rule->action;
1843         const struct flow_action_entry *act;
1844         int i;
1845
1846         flow_action_for_each(i, act, flow_action) {
1847                 switch (act->id) {
1848                 case FLOW_ACTION_GOTO:
1849                         return true;
1850                 default:
1851                         continue;
1852                 }
1853         }
1854
1855         return false;
1856 }
1857
1858 static int
1859 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1860                                     struct flow_dissector_key_enc_opts *opts,
1861                                     struct netlink_ext_ack *extack,
1862                                     bool *dont_care)
1863 {
1864         struct geneve_opt *opt;
1865         int off = 0;
1866
1867         *dont_care = true;
1868
1869         while (opts->len > off) {
1870                 opt = (struct geneve_opt *)&opts->data[off];
1871
1872                 if (!(*dont_care) || opt->opt_class || opt->type ||
1873                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1874                         *dont_care = false;
1875
1876                         if (opt->opt_class != U16_MAX ||
1877                             opt->type != U8_MAX) {
1878                                 NL_SET_ERR_MSG(extack,
1879                                                "Partial match of tunnel options in chain > 0 isn't supported");
1880                                 netdev_warn(priv->netdev,
1881                                             "Partial match of tunnel options in chain > 0 isn't supported");
1882                                 return -EOPNOTSUPP;
1883                         }
1884                 }
1885
1886                 off += sizeof(struct geneve_opt) + opt->length * 4;
1887         }
1888
1889         return 0;
1890 }
1891
1892 #define COPY_DISSECTOR(rule, diss_key, dst)\
1893 ({ \
1894         struct flow_rule *__rule = (rule);\
1895         typeof(dst) __dst = dst;\
1896 \
1897         memcpy(__dst,\
1898                skb_flow_dissector_target(__rule->match.dissector,\
1899                                          diss_key,\
1900                                          __rule->match.key),\
1901                sizeof(*__dst));\
1902 })
1903
1904 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1905                                     struct mlx5e_tc_flow *flow,
1906                                     struct flow_cls_offload *f,
1907                                     struct net_device *filter_dev)
1908 {
1909         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1910         struct netlink_ext_ack *extack = f->common.extack;
1911         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1912         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1913         struct flow_match_enc_opts enc_opts_match;
1914         struct tunnel_match_enc_opts tun_enc_opts;
1915         struct mlx5_rep_uplink_priv *uplink_priv;
1916         struct mlx5e_rep_priv *uplink_rpriv;
1917         struct tunnel_match_key tunnel_key;
1918         bool enc_opts_is_dont_care = true;
1919         u32 tun_id, enc_opts_id = 0;
1920         struct mlx5_eswitch *esw;
1921         u32 value, mask;
1922         int err;
1923
1924         esw = priv->mdev->priv.eswitch;
1925         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1926         uplink_priv = &uplink_rpriv->uplink_priv;
1927
1928         memset(&tunnel_key, 0, sizeof(tunnel_key));
1929         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1930                        &tunnel_key.enc_control);
1931         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1932                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1933                                &tunnel_key.enc_ipv4);
1934         else
1935                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1936                                &tunnel_key.enc_ipv6);
1937         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1938         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1939                        &tunnel_key.enc_tp);
1940         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1941                        &tunnel_key.enc_key_id);
1942         tunnel_key.filter_ifindex = filter_dev->ifindex;
1943
1944         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1945         if (err)
1946                 return err;
1947
1948         flow_rule_match_enc_opts(rule, &enc_opts_match);
1949         err = enc_opts_is_dont_care_or_full_match(priv,
1950                                                   enc_opts_match.mask,
1951                                                   extack,
1952                                                   &enc_opts_is_dont_care);
1953         if (err)
1954                 goto err_enc_opts;
1955
1956         if (!enc_opts_is_dont_care) {
1957                 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
1958                 memcpy(&tun_enc_opts.key, enc_opts_match.key,
1959                        sizeof(*enc_opts_match.key));
1960                 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
1961                        sizeof(*enc_opts_match.mask));
1962
1963                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
1964                                   &tun_enc_opts, &enc_opts_id);
1965                 if (err)
1966                         goto err_enc_opts;
1967         }
1968
1969         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
1970         mask = enc_opts_id ? TUNNEL_ID_MASK :
1971                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
1972
1973         if (attr->chain) {
1974                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
1975                                             TUNNEL_TO_REG, value, mask);
1976         } else {
1977                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1978                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
1979                                                 mod_hdr_acts,
1980                                                 TUNNEL_TO_REG, value);
1981                 if (err)
1982                         goto err_set;
1983
1984                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1985         }
1986
1987         flow->tunnel_id = value;
1988         return 0;
1989
1990 err_set:
1991         if (enc_opts_id)
1992                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1993                                enc_opts_id);
1994 err_enc_opts:
1995         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1996         return err;
1997 }
1998
1999 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2000 {
2001         u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
2002         u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
2003         struct mlx5_rep_uplink_priv *uplink_priv;
2004         struct mlx5e_rep_priv *uplink_rpriv;
2005         struct mlx5_eswitch *esw;
2006
2007         esw = flow->priv->mdev->priv.eswitch;
2008         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2009         uplink_priv = &uplink_rpriv->uplink_priv;
2010
2011         if (tun_id)
2012                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2013         if (enc_opts_id)
2014                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2015                                enc_opts_id);
2016 }
2017
2018 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
2019 {
2020         return flow->tunnel_id;
2021 }
2022
2023 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2024                             struct flow_match_basic *match, bool outer,
2025                             void *headers_c, void *headers_v)
2026 {
2027         bool ip_version_cap;
2028
2029         ip_version_cap = outer ?
2030                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2031                                           ft_field_support.outer_ip_version) :
2032                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2033                                           ft_field_support.inner_ip_version);
2034
2035         if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2036             (match->key->n_proto == htons(ETH_P_IP) ||
2037              match->key->n_proto == htons(ETH_P_IPV6))) {
2038                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2039                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2040                          match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2041         } else {
2042                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2043                          ntohs(match->mask->n_proto));
2044                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2045                          ntohs(match->key->n_proto));
2046         }
2047 }
2048
2049 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2050                              struct mlx5e_tc_flow *flow,
2051                              struct mlx5_flow_spec *spec,
2052                              struct flow_cls_offload *f,
2053                              struct net_device *filter_dev,
2054                              u8 *match_level,
2055                              bool *match_inner)
2056 {
2057         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2058         struct netlink_ext_ack *extack = f->common.extack;
2059         bool needs_mapping, sets_mapping;
2060         int err;
2061
2062         if (!mlx5e_is_eswitch_flow(flow))
2063                 return -EOPNOTSUPP;
2064
2065         needs_mapping = !!flow->esw_attr->chain;
2066         sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f);
2067         *match_inner = !needs_mapping;
2068
2069         if ((needs_mapping || sets_mapping) &&
2070             !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2071                 NL_SET_ERR_MSG(extack,
2072                                "Chains on tunnel devices isn't supported without register loopback support");
2073                 netdev_warn(priv->netdev,
2074                             "Chains on tunnel devices isn't supported without register loopback support");
2075                 return -EOPNOTSUPP;
2076         }
2077
2078         if (!flow->esw_attr->chain) {
2079                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2080                                          match_level);
2081                 if (err) {
2082                         NL_SET_ERR_MSG_MOD(extack,
2083                                            "Failed to parse tunnel attributes");
2084                         netdev_warn(priv->netdev,
2085                                     "Failed to parse tunnel attributes");
2086                         return err;
2087                 }
2088
2089                 /* With mpls over udp we decapsulate using packet reformat
2090                  * object
2091                  */
2092                 if (!netif_is_bareudp(filter_dev))
2093                         flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2094         }
2095
2096         if (!needs_mapping && !sets_mapping)
2097                 return 0;
2098
2099         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2100 }
2101
2102 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2103 {
2104         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2105                             inner_headers);
2106 }
2107
2108 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2109 {
2110         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2111                             inner_headers);
2112 }
2113
2114 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2115 {
2116         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2117                             outer_headers);
2118 }
2119
2120 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2121 {
2122         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2123                             outer_headers);
2124 }
2125
2126 static void *get_match_headers_value(u32 flags,
2127                                      struct mlx5_flow_spec *spec)
2128 {
2129         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2130                 get_match_inner_headers_value(spec) :
2131                 get_match_outer_headers_value(spec);
2132 }
2133
2134 static void *get_match_headers_criteria(u32 flags,
2135                                         struct mlx5_flow_spec *spec)
2136 {
2137         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2138                 get_match_inner_headers_criteria(spec) :
2139                 get_match_outer_headers_criteria(spec);
2140 }
2141
2142 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2143                                    struct flow_cls_offload *f)
2144 {
2145         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2146         struct netlink_ext_ack *extack = f->common.extack;
2147         struct net_device *ingress_dev;
2148         struct flow_match_meta match;
2149
2150         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2151                 return 0;
2152
2153         flow_rule_match_meta(rule, &match);
2154         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2155                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2156                 return -EINVAL;
2157         }
2158
2159         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2160                                          match.key->ingress_ifindex);
2161         if (!ingress_dev) {
2162                 NL_SET_ERR_MSG_MOD(extack,
2163                                    "Can't find the ingress port to match on");
2164                 return -EINVAL;
2165         }
2166
2167         if (ingress_dev != filter_dev) {
2168                 NL_SET_ERR_MSG_MOD(extack,
2169                                    "Can't match on the ingress filter port");
2170                 return -EINVAL;
2171         }
2172
2173         return 0;
2174 }
2175
2176 static bool skip_key_basic(struct net_device *filter_dev,
2177                            struct flow_cls_offload *f)
2178 {
2179         /* When doing mpls over udp decap, the user needs to provide
2180          * MPLS_UC as the protocol in order to be able to match on mpls
2181          * label fields.  However, the actual ethertype is IP so we want to
2182          * avoid matching on this, otherwise we'll fail the match.
2183          */
2184         if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2185                 return true;
2186
2187         return false;
2188 }
2189
2190 static int __parse_cls_flower(struct mlx5e_priv *priv,
2191                               struct mlx5e_tc_flow *flow,
2192                               struct mlx5_flow_spec *spec,
2193                               struct flow_cls_offload *f,
2194                               struct net_device *filter_dev,
2195                               u8 *inner_match_level, u8 *outer_match_level)
2196 {
2197         struct netlink_ext_ack *extack = f->common.extack;
2198         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2199                                        outer_headers);
2200         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2201                                        outer_headers);
2202         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2203                                     misc_parameters);
2204         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2205                                     misc_parameters);
2206         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2207         struct flow_dissector *dissector = rule->match.dissector;
2208         u16 addr_type = 0;
2209         u8 ip_proto = 0;
2210         u8 *match_level;
2211         int err;
2212
2213         match_level = outer_match_level;
2214
2215         if (dissector->used_keys &
2216             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2217               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2218               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2219               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2220               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2221               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2222               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2223               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2224               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2225               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2226               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2227               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2228               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2229               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2230               BIT(FLOW_DISSECTOR_KEY_TCP) |
2231               BIT(FLOW_DISSECTOR_KEY_IP)  |
2232               BIT(FLOW_DISSECTOR_KEY_CT) |
2233               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2234               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2235               BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2236                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2237                 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
2238                             dissector->used_keys);
2239                 return -EOPNOTSUPP;
2240         }
2241
2242         if (mlx5e_get_tc_tun(filter_dev)) {
2243                 bool match_inner = false;
2244
2245                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2246                                         outer_match_level, &match_inner);
2247                 if (err)
2248                         return err;
2249
2250                 if (match_inner) {
2251                         /* header pointers should point to the inner headers
2252                          * if the packet was decapsulated already.
2253                          * outer headers are set by parse_tunnel_attr.
2254                          */
2255                         match_level = inner_match_level;
2256                         headers_c = get_match_inner_headers_criteria(spec);
2257                         headers_v = get_match_inner_headers_value(spec);
2258                 }
2259         }
2260
2261         err = mlx5e_flower_parse_meta(filter_dev, f);
2262         if (err)
2263                 return err;
2264
2265         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2266             !skip_key_basic(filter_dev, f)) {
2267                 struct flow_match_basic match;
2268
2269                 flow_rule_match_basic(rule, &match);
2270                 mlx5e_tc_set_ethertype(priv->mdev, &match,
2271                                        match_level == outer_match_level,
2272                                        headers_c, headers_v);
2273
2274                 if (match.mask->n_proto)
2275                         *match_level = MLX5_MATCH_L2;
2276         }
2277         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2278             is_vlan_dev(filter_dev)) {
2279                 struct flow_dissector_key_vlan filter_dev_mask;
2280                 struct flow_dissector_key_vlan filter_dev_key;
2281                 struct flow_match_vlan match;
2282
2283                 if (is_vlan_dev(filter_dev)) {
2284                         match.key = &filter_dev_key;
2285                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2286                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2287                         match.key->vlan_priority = 0;
2288                         match.mask = &filter_dev_mask;
2289                         memset(match.mask, 0xff, sizeof(*match.mask));
2290                         match.mask->vlan_priority = 0;
2291                 } else {
2292                         flow_rule_match_vlan(rule, &match);
2293                 }
2294                 if (match.mask->vlan_id ||
2295                     match.mask->vlan_priority ||
2296                     match.mask->vlan_tpid) {
2297                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2298                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2299                                          svlan_tag, 1);
2300                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2301                                          svlan_tag, 1);
2302                         } else {
2303                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2304                                          cvlan_tag, 1);
2305                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2306                                          cvlan_tag, 1);
2307                         }
2308
2309                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2310                                  match.mask->vlan_id);
2311                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2312                                  match.key->vlan_id);
2313
2314                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2315                                  match.mask->vlan_priority);
2316                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2317                                  match.key->vlan_priority);
2318
2319                         *match_level = MLX5_MATCH_L2;
2320                 }
2321         } else if (*match_level != MLX5_MATCH_NONE) {
2322                 /* cvlan_tag enabled in match criteria and
2323                  * disabled in match value means both S & C tags
2324                  * don't exist (untagged of both)
2325                  */
2326                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2327                 *match_level = MLX5_MATCH_L2;
2328         }
2329
2330         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2331                 struct flow_match_vlan match;
2332
2333                 flow_rule_match_cvlan(rule, &match);
2334                 if (match.mask->vlan_id ||
2335                     match.mask->vlan_priority ||
2336                     match.mask->vlan_tpid) {
2337                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2338                                 MLX5_SET(fte_match_set_misc, misc_c,
2339                                          outer_second_svlan_tag, 1);
2340                                 MLX5_SET(fte_match_set_misc, misc_v,
2341                                          outer_second_svlan_tag, 1);
2342                         } else {
2343                                 MLX5_SET(fte_match_set_misc, misc_c,
2344                                          outer_second_cvlan_tag, 1);
2345                                 MLX5_SET(fte_match_set_misc, misc_v,
2346                                          outer_second_cvlan_tag, 1);
2347                         }
2348
2349                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2350                                  match.mask->vlan_id);
2351                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2352                                  match.key->vlan_id);
2353                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2354                                  match.mask->vlan_priority);
2355                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2356                                  match.key->vlan_priority);
2357
2358                         *match_level = MLX5_MATCH_L2;
2359                 }
2360         }
2361
2362         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2363                 struct flow_match_eth_addrs match;
2364
2365                 flow_rule_match_eth_addrs(rule, &match);
2366                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2367                                              dmac_47_16),
2368                                 match.mask->dst);
2369                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2370                                              dmac_47_16),
2371                                 match.key->dst);
2372
2373                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2374                                              smac_47_16),
2375                                 match.mask->src);
2376                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2377                                              smac_47_16),
2378                                 match.key->src);
2379
2380                 if (!is_zero_ether_addr(match.mask->src) ||
2381                     !is_zero_ether_addr(match.mask->dst))
2382                         *match_level = MLX5_MATCH_L2;
2383         }
2384
2385         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2386                 struct flow_match_control match;
2387
2388                 flow_rule_match_control(rule, &match);
2389                 addr_type = match.key->addr_type;
2390
2391                 /* the HW doesn't support frag first/later */
2392                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2393                         return -EOPNOTSUPP;
2394
2395                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2396                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2397                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2398                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2399
2400                         /* the HW doesn't need L3 inline to match on frag=no */
2401                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2402                                 *match_level = MLX5_MATCH_L2;
2403         /* ***  L2 attributes parsing up to here *** */
2404                         else
2405                                 *match_level = MLX5_MATCH_L3;
2406                 }
2407         }
2408
2409         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2410                 struct flow_match_basic match;
2411
2412                 flow_rule_match_basic(rule, &match);
2413                 ip_proto = match.key->ip_proto;
2414
2415                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2416                          match.mask->ip_proto);
2417                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2418                          match.key->ip_proto);
2419
2420                 if (match.mask->ip_proto)
2421                         *match_level = MLX5_MATCH_L3;
2422         }
2423
2424         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2425                 struct flow_match_ipv4_addrs match;
2426
2427                 flow_rule_match_ipv4_addrs(rule, &match);
2428                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2429                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2430                        &match.mask->src, sizeof(match.mask->src));
2431                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2432                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2433                        &match.key->src, sizeof(match.key->src));
2434                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2435                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2436                        &match.mask->dst, sizeof(match.mask->dst));
2437                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2438                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2439                        &match.key->dst, sizeof(match.key->dst));
2440
2441                 if (match.mask->src || match.mask->dst)
2442                         *match_level = MLX5_MATCH_L3;
2443         }
2444
2445         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2446                 struct flow_match_ipv6_addrs match;
2447
2448                 flow_rule_match_ipv6_addrs(rule, &match);
2449                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2450                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2451                        &match.mask->src, sizeof(match.mask->src));
2452                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2453                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2454                        &match.key->src, sizeof(match.key->src));
2455
2456                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2457                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2458                        &match.mask->dst, sizeof(match.mask->dst));
2459                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2460                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2461                        &match.key->dst, sizeof(match.key->dst));
2462
2463                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2464                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2465                         *match_level = MLX5_MATCH_L3;
2466         }
2467
2468         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2469                 struct flow_match_ip match;
2470
2471                 flow_rule_match_ip(rule, &match);
2472                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2473                          match.mask->tos & 0x3);
2474                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2475                          match.key->tos & 0x3);
2476
2477                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2478                          match.mask->tos >> 2);
2479                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2480                          match.key->tos  >> 2);
2481
2482                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2483                          match.mask->ttl);
2484                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2485                          match.key->ttl);
2486
2487                 if (match.mask->ttl &&
2488                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2489                                                 ft_field_support.outer_ipv4_ttl)) {
2490                         NL_SET_ERR_MSG_MOD(extack,
2491                                            "Matching on TTL is not supported");
2492                         return -EOPNOTSUPP;
2493                 }
2494
2495                 if (match.mask->tos || match.mask->ttl)
2496                         *match_level = MLX5_MATCH_L3;
2497         }
2498
2499         /* ***  L3 attributes parsing up to here *** */
2500
2501         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2502                 struct flow_match_ports match;
2503
2504                 flow_rule_match_ports(rule, &match);
2505                 switch (ip_proto) {
2506                 case IPPROTO_TCP:
2507                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2508                                  tcp_sport, ntohs(match.mask->src));
2509                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2510                                  tcp_sport, ntohs(match.key->src));
2511
2512                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2513                                  tcp_dport, ntohs(match.mask->dst));
2514                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2515                                  tcp_dport, ntohs(match.key->dst));
2516                         break;
2517
2518                 case IPPROTO_UDP:
2519                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2520                                  udp_sport, ntohs(match.mask->src));
2521                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2522                                  udp_sport, ntohs(match.key->src));
2523
2524                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2525                                  udp_dport, ntohs(match.mask->dst));
2526                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2527                                  udp_dport, ntohs(match.key->dst));
2528                         break;
2529                 default:
2530                         NL_SET_ERR_MSG_MOD(extack,
2531                                            "Only UDP and TCP transports are supported for L4 matching");
2532                         netdev_err(priv->netdev,
2533                                    "Only UDP and TCP transport are supported\n");
2534                         return -EINVAL;
2535                 }
2536
2537                 if (match.mask->src || match.mask->dst)
2538                         *match_level = MLX5_MATCH_L4;
2539         }
2540
2541         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2542                 struct flow_match_tcp match;
2543
2544                 flow_rule_match_tcp(rule, &match);
2545                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2546                          ntohs(match.mask->flags));
2547                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2548                          ntohs(match.key->flags));
2549
2550                 if (match.mask->flags)
2551                         *match_level = MLX5_MATCH_L4;
2552         }
2553
2554         return 0;
2555 }
2556
2557 static int parse_cls_flower(struct mlx5e_priv *priv,
2558                             struct mlx5e_tc_flow *flow,
2559                             struct mlx5_flow_spec *spec,
2560                             struct flow_cls_offload *f,
2561                             struct net_device *filter_dev)
2562 {
2563         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2564         struct netlink_ext_ack *extack = f->common.extack;
2565         struct mlx5_core_dev *dev = priv->mdev;
2566         struct mlx5_eswitch *esw = dev->priv.eswitch;
2567         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2568         struct mlx5_eswitch_rep *rep;
2569         bool is_eswitch_flow;
2570         int err;
2571
2572         inner_match_level = MLX5_MATCH_NONE;
2573         outer_match_level = MLX5_MATCH_NONE;
2574
2575         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2576                                  &inner_match_level, &outer_match_level);
2577         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2578                                  outer_match_level : inner_match_level;
2579
2580         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2581         if (!err && is_eswitch_flow) {
2582                 rep = rpriv->rep;
2583                 if (rep->vport != MLX5_VPORT_UPLINK &&
2584                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2585                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2586                         NL_SET_ERR_MSG_MOD(extack,
2587                                            "Flow is not offloaded due to min inline setting");
2588                         netdev_warn(priv->netdev,
2589                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2590                                     non_tunnel_match_level, esw->offloads.inline_mode);
2591                         return -EOPNOTSUPP;
2592                 }
2593         }
2594
2595         if (is_eswitch_flow) {
2596                 flow->esw_attr->inner_match_level = inner_match_level;
2597                 flow->esw_attr->outer_match_level = outer_match_level;
2598         } else {
2599                 flow->nic_attr->match_level = non_tunnel_match_level;
2600         }
2601
2602         return err;
2603 }
2604
2605 struct pedit_headers {
2606         struct ethhdr  eth;
2607         struct vlan_hdr vlan;
2608         struct iphdr   ip4;
2609         struct ipv6hdr ip6;
2610         struct tcphdr  tcp;
2611         struct udphdr  udp;
2612 };
2613
2614 struct pedit_headers_action {
2615         struct pedit_headers    vals;
2616         struct pedit_headers    masks;
2617         u32                     pedits;
2618 };
2619
2620 static int pedit_header_offsets[] = {
2621         [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2622         [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2623         [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2624         [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2625         [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2626 };
2627
2628 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2629
2630 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2631                          struct pedit_headers_action *hdrs)
2632 {
2633         u32 *curr_pmask, *curr_pval;
2634
2635         curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2636         curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2637
2638         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2639                 goto out_err;
2640
2641         *curr_pmask |= mask;
2642         *curr_pval  |= (val & mask);
2643
2644         return 0;
2645
2646 out_err:
2647         return -EOPNOTSUPP;
2648 }
2649
2650 struct mlx5_fields {
2651         u8  field;
2652         u8  field_bsize;
2653         u32 field_mask;
2654         u32 offset;
2655         u32 match_offset;
2656 };
2657
2658 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2659                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2660                  offsetof(struct pedit_headers, field) + (off), \
2661                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2662
2663 /* masked values are the same and there are no rewrites that do not have a
2664  * match.
2665  */
2666 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2667         type matchmaskx = *(type *)(matchmaskp); \
2668         type matchvalx = *(type *)(matchvalp); \
2669         type maskx = *(type *)(maskp); \
2670         type valx = *(type *)(valp); \
2671         \
2672         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2673                                                                  matchmaskx)); \
2674 })
2675
2676 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2677                          void *matchmaskp, u8 bsize)
2678 {
2679         bool same = false;
2680
2681         switch (bsize) {
2682         case 8:
2683                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2684                 break;
2685         case 16:
2686                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2687                 break;
2688         case 32:
2689                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2690                 break;
2691         }
2692
2693         return same;
2694 }
2695
2696 static struct mlx5_fields fields[] = {
2697         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2698         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2699         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2700         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2701         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2702         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2703
2704         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2705         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2706         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2707         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2708
2709         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2710                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2711         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2712                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2713         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2714                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2715         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2716                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2717         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2718                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2719         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2720                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2721         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2722                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2723         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2724                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2725         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2726
2727         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2728         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2729         /* in linux iphdr tcp_flags is 8 bits long */
2730         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2731
2732         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2733         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2734 };
2735
2736 static int offload_pedit_fields(struct mlx5e_priv *priv,
2737                                 int namespace,
2738                                 struct pedit_headers_action *hdrs,
2739                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2740                                 u32 *action_flags,
2741                                 struct netlink_ext_ack *extack)
2742 {
2743         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2744         int i, action_size, first, last, next_z;
2745         void *headers_c, *headers_v, *action, *vals_p;
2746         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2747         struct mlx5e_tc_mod_hdr_acts *mod_acts;
2748         struct mlx5_fields *f;
2749         unsigned long mask;
2750         __be32 mask_be32;
2751         __be16 mask_be16;
2752         int err;
2753         u8 cmd;
2754
2755         mod_acts = &parse_attr->mod_hdr_acts;
2756         headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2757         headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2758
2759         set_masks = &hdrs[0].masks;
2760         add_masks = &hdrs[1].masks;
2761         set_vals = &hdrs[0].vals;
2762         add_vals = &hdrs[1].vals;
2763
2764         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2765
2766         for (i = 0; i < ARRAY_SIZE(fields); i++) {
2767                 bool skip;
2768
2769                 f = &fields[i];
2770                 /* avoid seeing bits set from previous iterations */
2771                 s_mask = 0;
2772                 a_mask = 0;
2773
2774                 s_masks_p = (void *)set_masks + f->offset;
2775                 a_masks_p = (void *)add_masks + f->offset;
2776
2777                 s_mask = *s_masks_p & f->field_mask;
2778                 a_mask = *a_masks_p & f->field_mask;
2779
2780                 if (!s_mask && !a_mask) /* nothing to offload here */
2781                         continue;
2782
2783                 if (s_mask && a_mask) {
2784                         NL_SET_ERR_MSG_MOD(extack,
2785                                            "can't set and add to the same HW field");
2786                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2787                         return -EOPNOTSUPP;
2788                 }
2789
2790                 skip = false;
2791                 if (s_mask) {
2792                         void *match_mask = headers_c + f->match_offset;
2793                         void *match_val = headers_v + f->match_offset;
2794
2795                         cmd  = MLX5_ACTION_TYPE_SET;
2796                         mask = s_mask;
2797                         vals_p = (void *)set_vals + f->offset;
2798                         /* don't rewrite if we have a match on the same value */
2799                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
2800                                          match_mask, f->field_bsize))
2801                                 skip = true;
2802                         /* clear to denote we consumed this field */
2803                         *s_masks_p &= ~f->field_mask;
2804                 } else {
2805                         cmd  = MLX5_ACTION_TYPE_ADD;
2806                         mask = a_mask;
2807                         vals_p = (void *)add_vals + f->offset;
2808                         /* add 0 is no change */
2809                         if ((*(u32 *)vals_p & f->field_mask) == 0)
2810                                 skip = true;
2811                         /* clear to denote we consumed this field */
2812                         *a_masks_p &= ~f->field_mask;
2813                 }
2814                 if (skip)
2815                         continue;
2816
2817                 if (f->field_bsize == 32) {
2818                         mask_be32 = (__be32)mask;
2819                         mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2820                 } else if (f->field_bsize == 16) {
2821                         mask_be32 = (__be32)mask;
2822                         mask_be16 = *(__be16 *)&mask_be32;
2823                         mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2824                 }
2825
2826                 first = find_first_bit(&mask, f->field_bsize);
2827                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2828                 last  = find_last_bit(&mask, f->field_bsize);
2829                 if (first < next_z && next_z < last) {
2830                         NL_SET_ERR_MSG_MOD(extack,
2831                                            "rewrite of few sub-fields isn't supported");
2832                         printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2833                                mask);
2834                         return -EOPNOTSUPP;
2835                 }
2836
2837                 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2838                 if (err) {
2839                         NL_SET_ERR_MSG_MOD(extack,
2840                                            "too many pedit actions, can't offload");
2841                         mlx5_core_warn(priv->mdev,
2842                                        "mlx5: parsed %d pedit actions, can't do more\n",
2843                                        mod_acts->num_actions);
2844                         return err;
2845                 }
2846
2847                 action = mod_acts->actions +
2848                          (mod_acts->num_actions * action_size);
2849                 MLX5_SET(set_action_in, action, action_type, cmd);
2850                 MLX5_SET(set_action_in, action, field, f->field);
2851
2852                 if (cmd == MLX5_ACTION_TYPE_SET) {
2853                         int start;
2854
2855                         /* if field is bit sized it can start not from first bit */
2856                         start = find_first_bit((unsigned long *)&f->field_mask,
2857                                                f->field_bsize);
2858
2859                         MLX5_SET(set_action_in, action, offset, first - start);
2860                         /* length is num of bits to be written, zero means length of 32 */
2861                         MLX5_SET(set_action_in, action, length, (last - first + 1));
2862                 }
2863
2864                 if (f->field_bsize == 32)
2865                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2866                 else if (f->field_bsize == 16)
2867                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2868                 else if (f->field_bsize == 8)
2869                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2870
2871                 ++mod_acts->num_actions;
2872         }
2873
2874         return 0;
2875 }
2876
2877 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2878                                                   int namespace)
2879 {
2880         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2881                 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2882         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2883                 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2884 }
2885
2886 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
2887                           int namespace,
2888                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2889 {
2890         int action_size, new_num_actions, max_hw_actions;
2891         size_t new_sz, old_sz;
2892         void *ret;
2893
2894         if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
2895                 return 0;
2896
2897         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2898
2899         max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
2900                                                                 namespace);
2901         new_num_actions = min(max_hw_actions,
2902                               mod_hdr_acts->actions ?
2903                               mod_hdr_acts->max_actions * 2 : 1);
2904         if (mod_hdr_acts->max_actions == new_num_actions)
2905                 return -ENOSPC;
2906
2907         new_sz = action_size * new_num_actions;
2908         old_sz = mod_hdr_acts->max_actions * action_size;
2909         ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
2910         if (!ret)
2911                 return -ENOMEM;
2912
2913         memset(ret + old_sz, 0, new_sz - old_sz);
2914         mod_hdr_acts->actions = ret;
2915         mod_hdr_acts->max_actions = new_num_actions;
2916
2917         return 0;
2918 }
2919
2920 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2921 {
2922         kfree(mod_hdr_acts->actions);
2923         mod_hdr_acts->actions = NULL;
2924         mod_hdr_acts->num_actions = 0;
2925         mod_hdr_acts->max_actions = 0;
2926 }
2927
2928 static const struct pedit_headers zero_masks = {};
2929
2930 static int
2931 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
2932                           const struct flow_action_entry *act, int namespace,
2933                           struct mlx5e_tc_flow_parse_attr *parse_attr,
2934                           struct pedit_headers_action *hdrs,
2935                           struct netlink_ext_ack *extack)
2936 {
2937         u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2938         int err = -EOPNOTSUPP;
2939         u32 mask, val, offset;
2940         u8 htype;
2941
2942         htype = act->mangle.htype;
2943         err = -EOPNOTSUPP; /* can't be all optimistic */
2944
2945         if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2946                 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2947                 goto out_err;
2948         }
2949
2950         if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2951                 NL_SET_ERR_MSG_MOD(extack,
2952                                    "The pedit offload action is not supported");
2953                 goto out_err;
2954         }
2955
2956         mask = act->mangle.mask;
2957         val = act->mangle.val;
2958         offset = act->mangle.offset;
2959
2960         err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2961         if (err)
2962                 goto out_err;
2963
2964         hdrs[cmd].pedits++;
2965
2966         return 0;
2967 out_err:
2968         return err;
2969 }
2970
2971 static int
2972 parse_pedit_to_reformat(struct mlx5e_priv *priv,
2973                         const struct flow_action_entry *act,
2974                         struct mlx5e_tc_flow_parse_attr *parse_attr,
2975                         struct netlink_ext_ack *extack)
2976 {
2977         u32 mask, val, offset;
2978         u32 *p;
2979
2980         if (act->id != FLOW_ACTION_MANGLE)
2981                 return -EOPNOTSUPP;
2982
2983         if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
2984                 NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
2985                 return -EOPNOTSUPP;
2986         }
2987
2988         mask = ~act->mangle.mask;
2989         val = act->mangle.val;
2990         offset = act->mangle.offset;
2991         p = (u32 *)&parse_attr->eth;
2992         *(p + (offset >> 2)) |= (val & mask);
2993
2994         return 0;
2995 }
2996
2997 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2998                                  const struct flow_action_entry *act, int namespace,
2999                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3000                                  struct pedit_headers_action *hdrs,
3001                                  struct mlx5e_tc_flow *flow,
3002                                  struct netlink_ext_ack *extack)
3003 {
3004         if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3005                 return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3006
3007         return parse_pedit_to_modify_hdr(priv, act, namespace,
3008                                          parse_attr, hdrs, extack);
3009 }
3010
3011 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3012                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3013                                  struct pedit_headers_action *hdrs,
3014                                  u32 *action_flags,
3015                                  struct netlink_ext_ack *extack)
3016 {
3017         struct pedit_headers *cmd_masks;
3018         int err;
3019         u8 cmd;
3020
3021         err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3022                                    action_flags, extack);
3023         if (err < 0)
3024                 goto out_dealloc_parsed_actions;
3025
3026         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3027                 cmd_masks = &hdrs[cmd].masks;
3028                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3029                         NL_SET_ERR_MSG_MOD(extack,
3030                                            "attempt to offload an unsupported field");
3031                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3032                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3033                                        16, 1, cmd_masks, sizeof(zero_masks), true);
3034                         err = -EOPNOTSUPP;
3035                         goto out_dealloc_parsed_actions;
3036                 }
3037         }
3038
3039         return 0;
3040
3041 out_dealloc_parsed_actions:
3042         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3043         return err;
3044 }
3045
3046 static bool csum_offload_supported(struct mlx5e_priv *priv,
3047                                    u32 action,
3048                                    u32 update_flags,
3049                                    struct netlink_ext_ack *extack)
3050 {
3051         u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
3052                          TCA_CSUM_UPDATE_FLAG_UDP;
3053
3054         /*  The HW recalcs checksums only if re-writing headers */
3055         if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3056                 NL_SET_ERR_MSG_MOD(extack,
3057                                    "TC csum action is only offloaded with pedit");
3058                 netdev_warn(priv->netdev,
3059                             "TC csum action is only offloaded with pedit\n");
3060                 return false;
3061         }
3062
3063         if (update_flags & ~prot_flags) {
3064                 NL_SET_ERR_MSG_MOD(extack,
3065                                    "can't offload TC csum action for some header/s");
3066                 netdev_warn(priv->netdev,
3067                             "can't offload TC csum action for some header/s - flags %#x\n",
3068                             update_flags);
3069                 return false;
3070         }
3071
3072         return true;
3073 }
3074
3075 struct ip_ttl_word {
3076         __u8    ttl;
3077         __u8    protocol;
3078         __sum16 check;
3079 };
3080
3081 struct ipv6_hoplimit_word {
3082         __be16  payload_len;
3083         __u8    nexthdr;
3084         __u8    hop_limit;
3085 };
3086
3087 static int is_action_keys_supported(const struct flow_action_entry *act,
3088                                     bool ct_flow, bool *modify_ip_header,
3089                                     struct netlink_ext_ack *extack)
3090 {
3091         u32 mask, offset;
3092         u8 htype;
3093
3094         htype = act->mangle.htype;
3095         offset = act->mangle.offset;
3096         mask = ~act->mangle.mask;
3097         /* For IPv4 & IPv6 header check 4 byte word,
3098          * to determine that modified fields
3099          * are NOT ttl & hop_limit only.
3100          */
3101         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3102                 struct ip_ttl_word *ttl_word =
3103                         (struct ip_ttl_word *)&mask;
3104
3105                 if (offset != offsetof(struct iphdr, ttl) ||
3106                     ttl_word->protocol ||
3107                     ttl_word->check) {
3108                         *modify_ip_header = true;
3109                 }
3110
3111                 if (ct_flow && offset >= offsetof(struct iphdr, saddr)) {
3112                         NL_SET_ERR_MSG_MOD(extack,
3113                                            "can't offload re-write of ipv4 address with action ct");
3114                         return -EOPNOTSUPP;
3115                 }
3116         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3117                 struct ipv6_hoplimit_word *hoplimit_word =
3118                         (struct ipv6_hoplimit_word *)&mask;
3119
3120                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3121                     hoplimit_word->payload_len ||
3122                     hoplimit_word->nexthdr) {
3123                         *modify_ip_header = true;
3124                 }
3125
3126                 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) {
3127                         NL_SET_ERR_MSG_MOD(extack,
3128                                            "can't offload re-write of ipv6 address with action ct");
3129                         return -EOPNOTSUPP;
3130                 }
3131         } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3132                                htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) {
3133                 NL_SET_ERR_MSG_MOD(extack,
3134                                    "can't offload re-write of transport header ports with action ct");
3135                 return -EOPNOTSUPP;
3136         }
3137
3138         return 0;
3139 }
3140
3141 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
3142                                           struct flow_action *flow_action,
3143                                           u32 actions, bool ct_flow,
3144                                           struct netlink_ext_ack *extack)
3145 {
3146         const struct flow_action_entry *act;
3147         bool modify_ip_header;
3148         void *headers_c;
3149         void *headers_v;
3150         u16 ethertype;
3151         u8 ip_proto;
3152         int i, err;
3153
3154         headers_c = get_match_headers_criteria(actions, spec);
3155         headers_v = get_match_headers_value(actions, spec);
3156         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3157
3158         /* for non-IP we only re-write MACs, so we're okay */
3159         if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3160             ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3161                 goto out_ok;
3162
3163         modify_ip_header = false;
3164         flow_action_for_each(i, act, flow_action) {
3165                 if (act->id != FLOW_ACTION_MANGLE &&
3166                     act->id != FLOW_ACTION_ADD)
3167                         continue;
3168
3169                 err = is_action_keys_supported(act, ct_flow,
3170                                                &modify_ip_header, extack);
3171                 if (err)
3172                         return err;
3173         }
3174
3175         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3176         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3177             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3178                 NL_SET_ERR_MSG_MOD(extack,
3179                                    "can't offload re-write of non TCP/UDP");
3180                 pr_info("can't offload re-write of ip proto %d\n", ip_proto);
3181                 return false;
3182         }
3183
3184 out_ok:
3185         return true;
3186 }
3187
3188 static bool actions_match_supported(struct mlx5e_priv *priv,
3189                                     struct flow_action *flow_action,
3190                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
3191                                     struct mlx5e_tc_flow *flow,
3192                                     struct netlink_ext_ack *extack)
3193 {
3194         bool ct_flow;
3195         u32 actions;
3196
3197         ct_flow = flow_flag_test(flow, CT);
3198         if (mlx5e_is_eswitch_flow(flow)) {
3199                 actions = flow->esw_attr->action;
3200
3201                 if (flow->esw_attr->split_count && ct_flow) {
3202                         /* All registers used by ct are cleared when using
3203                          * split rules.
3204                          */
3205                         NL_SET_ERR_MSG_MOD(extack,
3206                                            "Can't offload mirroring with action ct");
3207                         return false;
3208                 }
3209         } else {
3210                 actions = flow->nic_attr->action;
3211         }
3212
3213         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3214                 return modify_header_match_supported(&parse_attr->spec,
3215                                                      flow_action, actions,
3216                                                      ct_flow, extack);
3217
3218         return true;
3219 }
3220
3221 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3222 {
3223         return priv->mdev == peer_priv->mdev;
3224 }
3225
3226 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3227 {
3228         struct mlx5_core_dev *fmdev, *pmdev;
3229         u64 fsystem_guid, psystem_guid;
3230
3231         fmdev = priv->mdev;
3232         pmdev = peer_priv->mdev;
3233
3234         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3235         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3236
3237         return (fsystem_guid == psystem_guid);
3238 }
3239
3240 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3241                                    const struct flow_action_entry *act,
3242                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
3243                                    struct pedit_headers_action *hdrs,
3244                                    u32 *action, struct netlink_ext_ack *extack)
3245 {
3246         u16 mask16 = VLAN_VID_MASK;
3247         u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3248         const struct flow_action_entry pedit_act = {
3249                 .id = FLOW_ACTION_MANGLE,
3250                 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3251                 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3252                 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3253                 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3254         };
3255         u8 match_prio_mask, match_prio_val;
3256         void *headers_c, *headers_v;
3257         int err;
3258
3259         headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3260         headers_v = get_match_headers_value(*action, &parse_attr->spec);
3261
3262         if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3263               MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3264                 NL_SET_ERR_MSG_MOD(extack,
3265                                    "VLAN rewrite action must have VLAN protocol match");
3266                 return -EOPNOTSUPP;
3267         }
3268
3269         match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3270         match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3271         if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3272                 NL_SET_ERR_MSG_MOD(extack,
3273                                    "Changing VLAN prio is not supported");
3274                 return -EOPNOTSUPP;
3275         }
3276
3277         err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3278         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3279
3280         return err;
3281 }
3282
3283 static int
3284 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3285                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3286                                  struct pedit_headers_action *hdrs,
3287                                  u32 *action, struct netlink_ext_ack *extack)
3288 {
3289         const struct flow_action_entry prio_tag_act = {
3290                 .vlan.vid = 0,
3291                 .vlan.prio =
3292                         MLX5_GET(fte_match_set_lyr_2_4,
3293                                  get_match_headers_value(*action,
3294                                                          &parse_attr->spec),
3295                                  first_prio) &
3296                         MLX5_GET(fte_match_set_lyr_2_4,
3297                                  get_match_headers_criteria(*action,
3298                                                             &parse_attr->spec),
3299                                  first_prio),
3300         };
3301
3302         return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3303                                        &prio_tag_act, parse_attr, hdrs, action,
3304                                        extack);
3305 }
3306
3307 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3308                                 struct flow_action *flow_action,
3309                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3310                                 struct mlx5e_tc_flow *flow,
3311                                 struct netlink_ext_ack *extack)
3312 {
3313         struct mlx5_nic_flow_attr *attr = flow->nic_attr;
3314         struct pedit_headers_action hdrs[2] = {};
3315         const struct flow_action_entry *act;
3316         u32 action = 0;
3317         int err, i;
3318
3319         if (!flow_action_has_entries(flow_action))
3320                 return -EINVAL;
3321
3322         if (!flow_action_hw_stats_check(flow_action, extack,
3323                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
3324                 return -EOPNOTSUPP;
3325
3326         attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3327
3328         flow_action_for_each(i, act, flow_action) {
3329                 switch (act->id) {
3330                 case FLOW_ACTION_ACCEPT:
3331                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3332                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3333                         break;
3334                 case FLOW_ACTION_DROP:
3335                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3336                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
3337                                                flow_table_properties_nic_receive.flow_counter))
3338                                 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3339                         break;
3340                 case FLOW_ACTION_MANGLE:
3341                 case FLOW_ACTION_ADD:
3342                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3343                                                     parse_attr, hdrs, NULL, extack);
3344                         if (err)
3345                                 return err;
3346
3347                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
3348                                   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3349                         break;
3350                 case FLOW_ACTION_VLAN_MANGLE:
3351                         err = add_vlan_rewrite_action(priv,
3352                                                       MLX5_FLOW_NAMESPACE_KERNEL,
3353                                                       act, parse_attr, hdrs,
3354                                                       &action, extack);
3355                         if (err)
3356                                 return err;
3357
3358                         break;
3359                 case FLOW_ACTION_CSUM:
3360                         if (csum_offload_supported(priv, action,
3361                                                    act->csum_flags,
3362                                                    extack))
3363                                 break;
3364
3365                         return -EOPNOTSUPP;
3366                 case FLOW_ACTION_REDIRECT: {
3367                         struct net_device *peer_dev = act->dev;
3368
3369                         if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3370                             same_hw_devs(priv, netdev_priv(peer_dev))) {
3371                                 parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3372                                 flow_flag_set(flow, HAIRPIN);
3373                                 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3374                                           MLX5_FLOW_CONTEXT_ACTION_COUNT;
3375                         } else {
3376                                 NL_SET_ERR_MSG_MOD(extack,
3377                                                    "device is not on same HW, can't offload");
3378                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3379                                             peer_dev->name);
3380                                 return -EINVAL;
3381                         }
3382                         }
3383                         break;
3384                 case FLOW_ACTION_MARK: {
3385                         u32 mark = act->mark;
3386
3387                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3388                                 NL_SET_ERR_MSG_MOD(extack,
3389                                                    "Bad flow mark - only 16 bit is supported");
3390                                 return -EINVAL;
3391                         }
3392
3393                         attr->flow_tag = mark;
3394                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3395                         }
3396                         break;
3397                 default:
3398                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3399                         return -EOPNOTSUPP;
3400                 }
3401         }
3402
3403         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3404             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3405                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3406                                             parse_attr, hdrs, &action, extack);
3407                 if (err)
3408                         return err;
3409                 /* in case all pedit actions are skipped, remove the MOD_HDR
3410                  * flag.
3411                  */
3412                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3413                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3414                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3415                 }
3416         }
3417
3418         attr->action = action;
3419         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3420                 return -EOPNOTSUPP;
3421
3422         return 0;
3423 }
3424
3425 struct encap_key {
3426         const struct ip_tunnel_key *ip_tun_key;
3427         struct mlx5e_tc_tunnel *tc_tunnel;
3428 };
3429
3430 static inline int cmp_encap_info(struct encap_key *a,
3431                                  struct encap_key *b)
3432 {
3433         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3434                a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
3435 }
3436
3437 static inline int cmp_decap_info(struct mlx5e_decap_key *a,
3438                                  struct mlx5e_decap_key *b)
3439 {
3440         return memcmp(&a->key, &b->key, sizeof(b->key));
3441 }
3442
3443 static inline int hash_encap_info(struct encap_key *key)
3444 {
3445         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3446                      key->tc_tunnel->tunnel_type);
3447 }
3448
3449 static inline int hash_decap_info(struct mlx5e_decap_key *key)
3450 {
3451         return jhash(&key->key, sizeof(key->key), 0);
3452 }
3453
3454 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3455                                   struct net_device *peer_netdev)
3456 {
3457         struct mlx5e_priv *peer_priv;
3458
3459         peer_priv = netdev_priv(peer_netdev);
3460
3461         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3462                 mlx5e_eswitch_vf_rep(priv->netdev) &&
3463                 mlx5e_eswitch_vf_rep(peer_netdev) &&
3464                 same_hw_devs(priv, peer_priv));
3465 }
3466
3467 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3468 {
3469         return refcount_inc_not_zero(&e->refcnt);
3470 }
3471
3472 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
3473 {
3474         return refcount_inc_not_zero(&e->refcnt);
3475 }
3476
3477 static struct mlx5e_encap_entry *
3478 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3479                 uintptr_t hash_key)
3480 {
3481         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3482         struct mlx5e_encap_entry *e;
3483         struct encap_key e_key;
3484
3485         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3486                                    encap_hlist, hash_key) {
3487                 e_key.ip_tun_key = &e->tun_info->key;
3488                 e_key.tc_tunnel = e->tunnel;
3489                 if (!cmp_encap_info(&e_key, key) &&
3490                     mlx5e_encap_take(e))
3491                         return e;
3492         }
3493
3494         return NULL;
3495 }
3496
3497 static struct mlx5e_decap_entry *
3498 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
3499                 uintptr_t hash_key)
3500 {
3501         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3502         struct mlx5e_decap_key r_key;
3503         struct mlx5e_decap_entry *e;
3504
3505         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
3506                                    hlist, hash_key) {
3507                 r_key = e->key;
3508                 if (!cmp_decap_info(&r_key, key) &&
3509                     mlx5e_decap_take(e))
3510                         return e;
3511         }
3512         return NULL;
3513 }
3514
3515 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3516 {
3517         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3518
3519         return kmemdup(tun_info, tun_size, GFP_KERNEL);
3520 }
3521
3522 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3523                                       struct mlx5e_tc_flow *flow,
3524                                       int out_index,
3525                                       struct mlx5e_encap_entry *e,
3526                                       struct netlink_ext_ack *extack)
3527 {
3528         int i;
3529
3530         for (i = 0; i < out_index; i++) {
3531                 if (flow->encaps[i].e != e)
3532                         continue;
3533                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3534                 netdev_err(priv->netdev, "can't duplicate encap action\n");
3535                 return true;
3536         }
3537
3538         return false;
3539 }
3540
3541 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3542                               struct mlx5e_tc_flow *flow,
3543                               struct net_device *mirred_dev,
3544                               int out_index,
3545                               struct netlink_ext_ack *extack,
3546                               struct net_device **encap_dev,
3547                               bool *encap_valid)
3548 {
3549         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3550         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3551         struct mlx5e_tc_flow_parse_attr *parse_attr;
3552         const struct ip_tunnel_info *tun_info;
3553         struct encap_key key;
3554         struct mlx5e_encap_entry *e;
3555         unsigned short family;
3556         uintptr_t hash_key;
3557         int err = 0;
3558
3559         parse_attr = attr->parse_attr;
3560         tun_info = parse_attr->tun_info[out_index];
3561         family = ip_tunnel_info_af(tun_info);
3562         key.ip_tun_key = &tun_info->key;
3563         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3564         if (!key.tc_tunnel) {
3565                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3566                 return -EOPNOTSUPP;
3567         }
3568
3569         hash_key = hash_encap_info(&key);
3570
3571         mutex_lock(&esw->offloads.encap_tbl_lock);
3572         e = mlx5e_encap_get(priv, &key, hash_key);
3573
3574         /* must verify if encap is valid or not */
3575         if (e) {
3576                 /* Check that entry was not already attached to this flow */
3577                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3578                         err = -EOPNOTSUPP;
3579                         goto out_err;
3580                 }
3581
3582                 mutex_unlock(&esw->offloads.encap_tbl_lock);
3583                 wait_for_completion(&e->res_ready);
3584
3585                 /* Protect against concurrent neigh update. */
3586                 mutex_lock(&esw->offloads.encap_tbl_lock);
3587                 if (e->compl_result < 0) {
3588                         err = -EREMOTEIO;
3589                         goto out_err;
3590                 }
3591                 goto attach_flow;
3592         }
3593
3594         e = kzalloc(sizeof(*e), GFP_KERNEL);
3595         if (!e) {
3596                 err = -ENOMEM;
3597                 goto out_err;
3598         }
3599
3600         refcount_set(&e->refcnt, 1);
3601         init_completion(&e->res_ready);
3602
3603         tun_info = dup_tun_info(tun_info);
3604         if (!tun_info) {
3605                 err = -ENOMEM;
3606                 goto out_err_init;
3607         }
3608         e->tun_info = tun_info;
3609         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3610         if (err)
3611                 goto out_err_init;
3612
3613         INIT_LIST_HEAD(&e->flows);
3614         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3615         mutex_unlock(&esw->offloads.encap_tbl_lock);
3616
3617         if (family == AF_INET)
3618                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3619         else if (family == AF_INET6)
3620                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3621
3622         /* Protect against concurrent neigh update. */
3623         mutex_lock(&esw->offloads.encap_tbl_lock);
3624         complete_all(&e->res_ready);
3625         if (err) {
3626                 e->compl_result = err;
3627                 goto out_err;
3628         }
3629         e->compl_result = 1;
3630
3631 attach_flow:
3632         flow->encaps[out_index].e = e;
3633         list_add(&flow->encaps[out_index].list, &e->flows);
3634         flow->encaps[out_index].index = out_index;
3635         *encap_dev = e->out_dev;
3636         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3637                 attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3638                 attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3639                 *encap_valid = true;
3640         } else {
3641                 *encap_valid = false;
3642         }
3643         mutex_unlock(&esw->offloads.encap_tbl_lock);
3644
3645         return err;
3646
3647 out_err:
3648         mutex_unlock(&esw->offloads.encap_tbl_lock);
3649         if (e)
3650                 mlx5e_encap_put(priv, e);
3651         return err;
3652
3653 out_err_init:
3654         mutex_unlock(&esw->offloads.encap_tbl_lock);
3655         kfree(tun_info);
3656         kfree(e);
3657         return err;
3658 }
3659
3660 static int mlx5e_attach_decap(struct mlx5e_priv *priv,
3661                               struct mlx5e_tc_flow *flow,
3662                               struct netlink_ext_ack *extack)
3663 {
3664         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3665         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3666         struct mlx5e_tc_flow_parse_attr *parse_attr;
3667         struct mlx5e_decap_entry *d;
3668         struct mlx5e_decap_key key;
3669         uintptr_t hash_key;
3670         int err = 0;
3671
3672         parse_attr = attr->parse_attr;
3673         if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
3674                 NL_SET_ERR_MSG_MOD(extack,
3675                                    "encap header larger than max supported");
3676                 return -EOPNOTSUPP;
3677         }
3678
3679         key.key = parse_attr->eth;
3680         hash_key = hash_decap_info(&key);
3681         mutex_lock(&esw->offloads.decap_tbl_lock);
3682         d = mlx5e_decap_get(priv, &key, hash_key);
3683         if (d) {
3684                 mutex_unlock(&esw->offloads.decap_tbl_lock);
3685                 wait_for_completion(&d->res_ready);
3686                 mutex_lock(&esw->offloads.decap_tbl_lock);
3687                 if (d->compl_result) {
3688                         err = -EREMOTEIO;
3689                         goto out_free;
3690                 }
3691                 goto found;
3692         }
3693
3694         d = kzalloc(sizeof(*d), GFP_KERNEL);
3695         if (!d) {
3696                 err = -ENOMEM;
3697                 goto out_err;
3698         }
3699
3700         d->key = key;
3701         refcount_set(&d->refcnt, 1);
3702         init_completion(&d->res_ready);
3703         INIT_LIST_HEAD(&d->flows);
3704         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
3705         mutex_unlock(&esw->offloads.decap_tbl_lock);
3706
3707         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
3708                                                      MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
3709                                                      sizeof(parse_attr->eth),
3710                                                      &parse_attr->eth,
3711                                                      MLX5_FLOW_NAMESPACE_FDB);
3712         if (IS_ERR(d->pkt_reformat)) {
3713                 err = PTR_ERR(d->pkt_reformat);
3714                 d->compl_result = err;
3715         }
3716         mutex_lock(&esw->offloads.decap_tbl_lock);
3717         complete_all(&d->res_ready);
3718         if (err)
3719                 goto out_free;
3720
3721 found:
3722         flow->decap_reformat = d;
3723         attr->decap_pkt_reformat = d->pkt_reformat;
3724         list_add(&flow->l3_to_l2_reformat, &d->flows);
3725         mutex_unlock(&esw->offloads.decap_tbl_lock);
3726         return 0;
3727
3728 out_free:
3729         mutex_unlock(&esw->offloads.decap_tbl_lock);
3730         mlx5e_decap_put(priv, d);
3731         return err;
3732
3733 out_err:
3734         mutex_unlock(&esw->offloads.decap_tbl_lock);
3735         return err;
3736 }
3737
3738 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3739                                 const struct flow_action_entry *act,
3740                                 struct mlx5_esw_flow_attr *attr,
3741                                 u32 *action)
3742 {
3743         u8 vlan_idx = attr->total_vlan;
3744
3745         if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3746                 return -EOPNOTSUPP;
3747
3748         switch (act->id) {
3749         case FLOW_ACTION_VLAN_POP:
3750                 if (vlan_idx) {
3751                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3752                                                                  MLX5_FS_VLAN_DEPTH))
3753                                 return -EOPNOTSUPP;
3754
3755                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3756                 } else {
3757                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3758                 }
3759                 break;
3760         case FLOW_ACTION_VLAN_PUSH:
3761                 attr->vlan_vid[vlan_idx] = act->vlan.vid;
3762                 attr->vlan_prio[vlan_idx] = act->vlan.prio;
3763                 attr->vlan_proto[vlan_idx] = act->vlan.proto;
3764                 if (!attr->vlan_proto[vlan_idx])
3765                         attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3766
3767                 if (vlan_idx) {
3768                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3769                                                                  MLX5_FS_VLAN_DEPTH))
3770                                 return -EOPNOTSUPP;
3771
3772                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3773                 } else {
3774                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3775                             (act->vlan.proto != htons(ETH_P_8021Q) ||
3776                              act->vlan.prio))
3777                                 return -EOPNOTSUPP;
3778
3779                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3780                 }
3781                 break;
3782         default:
3783                 return -EINVAL;
3784         }
3785
3786         attr->total_vlan = vlan_idx + 1;
3787
3788         return 0;
3789 }
3790
3791 static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
3792                                           struct net_device *out_dev)
3793 {
3794         struct net_device *fdb_out_dev = out_dev;
3795         struct net_device *uplink_upper;
3796
3797         rcu_read_lock();
3798         uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
3799         if (uplink_upper && netif_is_lag_master(uplink_upper) &&
3800             uplink_upper == out_dev) {
3801                 fdb_out_dev = uplink_dev;
3802         } else if (netif_is_lag_master(out_dev)) {
3803                 fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
3804                 if (fdb_out_dev &&
3805                     (!mlx5e_eswitch_rep(fdb_out_dev) ||
3806                      !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
3807                         fdb_out_dev = NULL;
3808         }
3809         rcu_read_unlock();
3810         return fdb_out_dev;
3811 }
3812
3813 static int add_vlan_push_action(struct mlx5e_priv *priv,
3814                                 struct mlx5_esw_flow_attr *attr,
3815                                 struct net_device **out_dev,
3816                                 u32 *action)
3817 {
3818         struct net_device *vlan_dev = *out_dev;
3819         struct flow_action_entry vlan_act = {
3820                 .id = FLOW_ACTION_VLAN_PUSH,
3821                 .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3822                 .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3823                 .vlan.prio = 0,
3824         };
3825         int err;
3826
3827         err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3828         if (err)
3829                 return err;
3830
3831         *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
3832                                         dev_get_iflink(vlan_dev));
3833         if (is_vlan_dev(*out_dev))
3834                 err = add_vlan_push_action(priv, attr, out_dev, action);
3835
3836         return err;
3837 }
3838
3839 static int add_vlan_pop_action(struct mlx5e_priv *priv,
3840                                struct mlx5_esw_flow_attr *attr,
3841                                u32 *action)
3842 {
3843         struct flow_action_entry vlan_act = {
3844                 .id = FLOW_ACTION_VLAN_POP,
3845         };
3846         int nest_level, err = 0;
3847
3848         nest_level = attr->parse_attr->filter_dev->lower_level -
3849                                                 priv->netdev->lower_level;
3850         while (nest_level--) {
3851                 err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3852                 if (err)
3853                         return err;
3854         }
3855
3856         return err;
3857 }
3858
3859 static bool same_hw_reps(struct mlx5e_priv *priv,
3860                          struct net_device *peer_netdev)
3861 {
3862         struct mlx5e_priv *peer_priv;
3863
3864         peer_priv = netdev_priv(peer_netdev);
3865
3866         return mlx5e_eswitch_rep(priv->netdev) &&
3867                mlx5e_eswitch_rep(peer_netdev) &&
3868                same_hw_devs(priv, peer_priv);
3869 }
3870
3871 static bool is_lag_dev(struct mlx5e_priv *priv,
3872                        struct net_device *peer_netdev)
3873 {
3874         return ((mlx5_lag_is_sriov(priv->mdev) ||
3875                  mlx5_lag_is_multipath(priv->mdev)) &&
3876                  same_hw_reps(priv, peer_netdev));
3877 }
3878
3879 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3880                                     struct net_device *out_dev)
3881 {
3882         if (is_merged_eswitch_vfs(priv, out_dev))
3883                 return true;
3884
3885         if (is_lag_dev(priv, out_dev))
3886                 return true;
3887
3888         return mlx5e_eswitch_rep(out_dev) &&
3889                same_port_devs(priv, netdev_priv(out_dev));
3890 }
3891
3892 static bool is_duplicated_output_device(struct net_device *dev,
3893                                         struct net_device *out_dev,
3894                                         int *ifindexes, int if_count,
3895                                         struct netlink_ext_ack *extack)
3896 {
3897         int i;
3898
3899         for (i = 0; i < if_count; i++) {
3900                 if (ifindexes[i] == out_dev->ifindex) {
3901                         NL_SET_ERR_MSG_MOD(extack,
3902                                            "can't duplicate output to same device");
3903                         netdev_err(dev, "can't duplicate output to same device: %s\n",
3904                                    out_dev->name);
3905                         return true;
3906                 }
3907         }
3908
3909         return false;
3910 }
3911
3912 static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw,
3913                                     struct mlx5e_tc_flow *flow,
3914                                     const struct flow_action_entry *act,
3915                                     u32 actions,
3916                                     struct netlink_ext_ack *extack)
3917 {
3918         u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
3919         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3920         bool ft_flow = mlx5e_is_ft_flow(flow);
3921         u32 dest_chain = act->chain_index;
3922
3923         if (ft_flow) {
3924                 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3925                 return -EOPNOTSUPP;
3926         }
3927
3928         if (!mlx5_esw_chains_backwards_supported(esw) &&
3929             dest_chain <= attr->chain) {
3930                 NL_SET_ERR_MSG_MOD(extack,
3931                                    "Goto lower numbered chain isn't supported");
3932                 return -EOPNOTSUPP;
3933         }
3934         if (dest_chain > max_chain) {
3935                 NL_SET_ERR_MSG_MOD(extack,
3936                                    "Requested destination chain is out of supported range");
3937                 return -EOPNOTSUPP;
3938         }
3939
3940         if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3941                        MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3942             !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) {
3943                 NL_SET_ERR_MSG_MOD(extack,
3944                                    "Goto chain is not allowed if action has reformat or decap");
3945                 return -EOPNOTSUPP;
3946         }
3947
3948         return 0;
3949 }
3950
3951 static int verify_uplink_forwarding(struct mlx5e_priv *priv,
3952                                     struct mlx5e_tc_flow *flow,
3953                                     struct net_device *out_dev,
3954                                     struct netlink_ext_ack *extack)
3955 {
3956         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3957         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3958         struct mlx5e_rep_priv *rep_priv;
3959
3960         /* Forwarding non encapsulated traffic between
3961          * uplink ports is allowed only if
3962          * termination_table_raw_traffic cap is set.
3963          *
3964          * Input vport was stored esw_attr->in_rep.
3965          * In LAG case, *priv* is the private data of
3966          * uplink which may be not the input vport.
3967          */
3968         rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
3969
3970         if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
3971               mlx5e_eswitch_uplink_rep(out_dev)))
3972                 return 0;
3973
3974         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
3975                                         termination_table_raw_traffic)) {
3976                 NL_SET_ERR_MSG_MOD(extack,
3977                                    "devices are both uplink, can't offload forwarding");
3978                         pr_err("devices %s %s are both uplink, can't offload forwarding\n",
3979                                priv->netdev->name, out_dev->name);
3980                         return -EOPNOTSUPP;
3981         } else if (out_dev != rep_priv->netdev) {
3982                 NL_SET_ERR_MSG_MOD(extack,
3983                                    "devices are not the same uplink, can't offload forwarding");
3984                 pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
3985                        priv->netdev->name, out_dev->name);
3986                 return -EOPNOTSUPP;
3987         }
3988         return 0;
3989 }
3990
3991 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
3992                                 struct flow_action *flow_action,
3993                                 struct mlx5e_tc_flow *flow,
3994                                 struct netlink_ext_ack *extack,
3995                                 struct net_device *filter_dev)
3996 {
3997         struct pedit_headers_action hdrs[2] = {};
3998         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3999         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
4000         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
4001         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4002         const struct ip_tunnel_info *info = NULL;
4003         int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
4004         bool ft_flow = mlx5e_is_ft_flow(flow);
4005         const struct flow_action_entry *act;
4006         bool encap = false, decap = false;
4007         u32 action = attr->action;
4008         int err, i, if_count = 0;
4009         bool mpls_push = false;
4010
4011         if (!flow_action_has_entries(flow_action))
4012                 return -EINVAL;
4013
4014         if (!flow_action_hw_stats_check(flow_action, extack,
4015                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
4016                 return -EOPNOTSUPP;
4017
4018         flow_action_for_each(i, act, flow_action) {
4019                 switch (act->id) {
4020                 case FLOW_ACTION_DROP:
4021                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
4022                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
4023                         break;
4024                 case FLOW_ACTION_MPLS_PUSH:
4025                         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
4026                                                         reformat_l2_to_l3_tunnel) ||
4027                             act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
4028                                 NL_SET_ERR_MSG_MOD(extack,
4029                                                    "mpls push is supported only for mpls_uc protocol");
4030                                 return -EOPNOTSUPP;
4031                         }
4032                         mpls_push = true;
4033                         break;
4034                 case FLOW_ACTION_MPLS_POP:
4035                         /* we only support mpls pop if it is the first action
4036                          * and the filter net device is bareudp. Subsequent
4037                          * actions can be pedit and the last can be mirred
4038                          * egress redirect.
4039                          */
4040                         if (i) {
4041                                 NL_SET_ERR_MSG_MOD(extack,
4042                                                    "mpls pop supported only as first action");
4043                                 return -EOPNOTSUPP;
4044                         }
4045                         if (!netif_is_bareudp(filter_dev)) {
4046                                 NL_SET_ERR_MSG_MOD(extack,
4047                                                    "mpls pop supported only on bareudp devices");
4048                                 return -EOPNOTSUPP;
4049                         }
4050
4051                         parse_attr->eth.h_proto = act->mpls_pop.proto;
4052                         action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
4053                         flow_flag_set(flow, L3_TO_L2_DECAP);
4054                         break;
4055                 case FLOW_ACTION_MANGLE:
4056                 case FLOW_ACTION_ADD:
4057                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
4058                                                     parse_attr, hdrs, flow, extack);
4059                         if (err)
4060                                 return err;
4061
4062                         if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
4063                                 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4064                                 attr->split_count = attr->out_count;
4065                         }
4066                         break;
4067                 case FLOW_ACTION_CSUM:
4068                         if (csum_offload_supported(priv, action,
4069                                                    act->csum_flags, extack))
4070                                 break;
4071
4072                         return -EOPNOTSUPP;
4073                 case FLOW_ACTION_REDIRECT:
4074                 case FLOW_ACTION_MIRRED: {
4075                         struct mlx5e_priv *out_priv;
4076                         struct net_device *out_dev;
4077
4078                         out_dev = act->dev;
4079                         if (!out_dev) {
4080                                 /* out_dev is NULL when filters with
4081                                  * non-existing mirred device are replayed to
4082                                  * the driver.
4083                                  */
4084                                 return -EINVAL;
4085                         }
4086
4087                         if (mpls_push && !netif_is_bareudp(out_dev)) {
4088                                 NL_SET_ERR_MSG_MOD(extack,
4089                                                    "mpls is supported only through a bareudp device");
4090                                 return -EOPNOTSUPP;
4091                         }
4092
4093                         if (ft_flow && out_dev == priv->netdev) {
4094                                 /* Ignore forward to self rules generated
4095                                  * by adding both mlx5 devs to the flow table
4096                                  * block on a normal nft offload setup.
4097                                  */
4098                                 return -EOPNOTSUPP;
4099                         }
4100
4101                         if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
4102                                 NL_SET_ERR_MSG_MOD(extack,
4103                                                    "can't support more output ports, can't offload forwarding");
4104                                 netdev_warn(priv->netdev,
4105                                             "can't support more than %d output ports, can't offload forwarding\n",
4106                                             attr->out_count);
4107                                 return -EOPNOTSUPP;
4108                         }
4109
4110                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4111                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
4112                         if (encap) {
4113                                 parse_attr->mirred_ifindex[attr->out_count] =
4114                                         out_dev->ifindex;
4115                                 parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
4116                                 if (!parse_attr->tun_info[attr->out_count])
4117                                         return -ENOMEM;
4118                                 encap = false;
4119                                 attr->dests[attr->out_count].flags |=
4120                                         MLX5_ESW_DEST_ENCAP;
4121                                 attr->out_count++;
4122                                 /* attr->dests[].rep is resolved when we
4123                                  * handle encap
4124                                  */
4125                         } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4126                                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4127                                 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4128
4129                                 if (is_duplicated_output_device(priv->netdev,
4130                                                                 out_dev,
4131                                                                 ifindexes,
4132                                                                 if_count,
4133                                                                 extack))
4134                                         return -EOPNOTSUPP;
4135
4136                                 ifindexes[if_count] = out_dev->ifindex;
4137                                 if_count++;
4138
4139                                 out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4140                                 if (!out_dev)
4141                                         return -ENODEV;
4142
4143                                 if (is_vlan_dev(out_dev)) {
4144                                         err = add_vlan_push_action(priv, attr,
4145                                                                    &out_dev,
4146                                                                    &action);
4147                                         if (err)
4148                                                 return err;
4149                                 }
4150
4151                                 if (is_vlan_dev(parse_attr->filter_dev)) {
4152                                         err = add_vlan_pop_action(priv, attr,
4153                                                                   &action);
4154                                         if (err)
4155                                                 return err;
4156                                 }
4157
4158                                 err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4159                                 if (err)
4160                                         return err;
4161
4162                                 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4163                                         NL_SET_ERR_MSG_MOD(extack,
4164                                                            "devices are not on same switch HW, can't offload forwarding");
4165                                         netdev_warn(priv->netdev,
4166                                                     "devices %s %s not on same switch HW, can't offload forwarding\n",
4167                                                     priv->netdev->name,
4168                                                     out_dev->name);
4169                                         return -EOPNOTSUPP;
4170                                 }
4171
4172                                 out_priv = netdev_priv(out_dev);
4173                                 rpriv = out_priv->ppriv;
4174                                 attr->dests[attr->out_count].rep = rpriv->rep;
4175                                 attr->dests[attr->out_count].mdev = out_priv->mdev;
4176                                 attr->out_count++;
4177                         } else if (parse_attr->filter_dev != priv->netdev) {
4178                                 /* All mlx5 devices are called to configure
4179                                  * high level device filters. Therefore, the
4180                                  * *attempt* to  install a filter on invalid
4181                                  * eswitch should not trigger an explicit error
4182                                  */
4183                                 return -EINVAL;
4184                         } else {
4185                                 NL_SET_ERR_MSG_MOD(extack,
4186                                                    "devices are not on same switch HW, can't offload forwarding");
4187                                 netdev_warn(priv->netdev,
4188                                             "devices %s %s not on same switch HW, can't offload forwarding\n",
4189                                             priv->netdev->name,
4190                                             out_dev->name);
4191                                 return -EINVAL;
4192                         }
4193                         }
4194                         break;
4195                 case FLOW_ACTION_TUNNEL_ENCAP:
4196                         info = act->tunnel;
4197                         if (info)
4198                                 encap = true;
4199                         else
4200                                 return -EOPNOTSUPP;
4201
4202                         break;
4203                 case FLOW_ACTION_VLAN_PUSH:
4204                 case FLOW_ACTION_VLAN_POP:
4205                         if (act->id == FLOW_ACTION_VLAN_PUSH &&
4206                             (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4207                                 /* Replace vlan pop+push with vlan modify */
4208                                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4209                                 err = add_vlan_rewrite_action(priv,
4210                                                               MLX5_FLOW_NAMESPACE_FDB,
4211                                                               act, parse_attr, hdrs,
4212                                                               &action, extack);
4213                         } else {
4214                                 err = parse_tc_vlan_action(priv, act, attr, &action);
4215                         }
4216                         if (err)
4217                                 return err;
4218
4219                         attr->split_count = attr->out_count;
4220                         break;
4221                 case FLOW_ACTION_VLAN_MANGLE:
4222                         err = add_vlan_rewrite_action(priv,
4223                                                       MLX5_FLOW_NAMESPACE_FDB,
4224                                                       act, parse_attr, hdrs,
4225                                                       &action, extack);
4226                         if (err)
4227                                 return err;
4228
4229                         attr->split_count = attr->out_count;
4230                         break;
4231                 case FLOW_ACTION_TUNNEL_DECAP:
4232                         decap = true;
4233                         break;
4234                 case FLOW_ACTION_GOTO:
4235                         err = mlx5_validate_goto_chain(esw, flow, act, action,
4236                                                        extack);
4237                         if (err)
4238                                 return err;
4239
4240                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4241                         attr->dest_chain = act->chain_index;
4242                         break;
4243                 case FLOW_ACTION_CT:
4244                         err = mlx5_tc_ct_parse_action(priv, attr, act, extack);
4245                         if (err)
4246                                 return err;
4247
4248                         flow_flag_set(flow, CT);
4249                         break;
4250                 default:
4251                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4252                         return -EOPNOTSUPP;
4253                 }
4254         }
4255
4256         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4257             action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4258                 /* For prio tag mode, replace vlan pop with rewrite vlan prio
4259                  * tag rewrite.
4260                  */
4261                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4262                 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4263                                                        &action, extack);
4264                 if (err)
4265                         return err;
4266         }
4267
4268         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4269             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4270                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4271                                             parse_attr, hdrs, &action, extack);
4272                 if (err)
4273                         return err;
4274                 /* in case all pedit actions are skipped, remove the MOD_HDR
4275                  * flag. we might have set split_count either by pedit or
4276                  * pop/push. if there is no pop/push either, reset it too.
4277                  */
4278                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
4279                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4280                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4281                         if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4282                               (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4283                                 attr->split_count = 0;
4284                 }
4285         }
4286
4287         attr->action = action;
4288         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
4289                 return -EOPNOTSUPP;
4290
4291         if (attr->dest_chain) {
4292                 if (decap) {
4293                         /* It can be supported if we'll create a mapping for
4294                          * the tunnel device only (without tunnel), and set
4295                          * this tunnel id with this decap flow.
4296                          *
4297                          * On restore (miss), we'll just set this saved tunnel
4298                          * device.
4299                          */
4300
4301                         NL_SET_ERR_MSG(extack,
4302                                        "Decap with goto isn't supported");
4303                         netdev_warn(priv->netdev,
4304                                     "Decap with goto isn't supported");
4305                         return -EOPNOTSUPP;
4306                 }
4307
4308                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
4309                         NL_SET_ERR_MSG_MOD(extack,
4310                                            "Mirroring goto chain rules isn't supported");
4311                         return -EOPNOTSUPP;
4312                 }
4313                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4314         }
4315
4316         if (!(attr->action &
4317               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4318                 NL_SET_ERR_MSG_MOD(extack,
4319                                    "Rule must have at least one forward/drop action");
4320                 return -EOPNOTSUPP;
4321         }
4322
4323         if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4324                 NL_SET_ERR_MSG_MOD(extack,
4325                                    "current firmware doesn't support split rule for port mirroring");
4326                 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
4327                 return -EOPNOTSUPP;
4328         }
4329
4330         return 0;
4331 }
4332
4333 static void get_flags(int flags, unsigned long *flow_flags)
4334 {
4335         unsigned long __flow_flags = 0;
4336
4337         if (flags & MLX5_TC_FLAG(INGRESS))
4338                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4339         if (flags & MLX5_TC_FLAG(EGRESS))
4340                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4341
4342         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4343                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4344         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4345                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4346         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4347                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4348
4349         *flow_flags = __flow_flags;
4350 }
4351
4352 static const struct rhashtable_params tc_ht_params = {
4353         .head_offset = offsetof(struct mlx5e_tc_flow, node),
4354         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4355         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4356         .automatic_shrinking = true,
4357 };
4358
4359 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4360                                     unsigned long flags)
4361 {
4362         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4363         struct mlx5e_rep_priv *uplink_rpriv;
4364
4365         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4366                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4367                 return &uplink_rpriv->uplink_priv.tc_ht;
4368         } else /* NIC offload */
4369                 return &priv->fs.tc.ht;
4370 }
4371
4372 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4373 {
4374         struct mlx5_esw_flow_attr *attr = flow->esw_attr;
4375         bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4376                 flow_flag_test(flow, INGRESS);
4377         bool act_is_encap = !!(attr->action &
4378                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4379         bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
4380                                                 MLX5_DEVCOM_ESW_OFFLOADS);
4381
4382         if (!esw_paired)
4383                 return false;
4384
4385         if ((mlx5_lag_is_sriov(attr->in_mdev) ||
4386              mlx5_lag_is_multipath(attr->in_mdev)) &&
4387             (is_rep_ingress || act_is_encap))
4388                 return true;
4389
4390         return false;
4391 }
4392
4393 static int
4394 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4395                  struct flow_cls_offload *f, unsigned long flow_flags,
4396                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
4397                  struct mlx5e_tc_flow **__flow)
4398 {
4399         struct mlx5e_tc_flow_parse_attr *parse_attr;
4400         struct mlx5e_tc_flow *flow;
4401         int out_index, err;
4402
4403         flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
4404         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4405         if (!parse_attr || !flow) {
4406                 err = -ENOMEM;
4407                 goto err_free;
4408         }
4409
4410         flow->cookie = f->cookie;
4411         flow->flags = flow_flags;
4412         flow->priv = priv;
4413         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4414                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4415         INIT_LIST_HEAD(&flow->mod_hdr);
4416         INIT_LIST_HEAD(&flow->hairpin);
4417         INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4418         refcount_set(&flow->refcnt, 1);
4419         init_completion(&flow->init_done);
4420
4421         *__flow = flow;
4422         *__parse_attr = parse_attr;
4423
4424         return 0;
4425
4426 err_free:
4427         kfree(flow);
4428         kvfree(parse_attr);
4429         return err;
4430 }
4431
4432 static void
4433 mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
4434                          struct mlx5e_priv *priv,
4435                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4436                          struct flow_cls_offload *f,
4437                          struct mlx5_eswitch_rep *in_rep,
4438                          struct mlx5_core_dev *in_mdev)
4439 {
4440         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4441
4442         esw_attr->parse_attr = parse_attr;
4443         esw_attr->chain = f->common.chain_index;
4444         esw_attr->prio = f->common.prio;
4445
4446         esw_attr->in_rep = in_rep;
4447         esw_attr->in_mdev = in_mdev;
4448
4449         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4450             MLX5_COUNTER_SOURCE_ESWITCH)
4451                 esw_attr->counter_dev = in_mdev;
4452         else
4453                 esw_attr->counter_dev = priv->mdev;
4454 }
4455
4456 static struct mlx5e_tc_flow *
4457 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4458                      struct flow_cls_offload *f,
4459                      unsigned long flow_flags,
4460                      struct net_device *filter_dev,
4461                      struct mlx5_eswitch_rep *in_rep,
4462                      struct mlx5_core_dev *in_mdev)
4463 {
4464         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4465         struct netlink_ext_ack *extack = f->common.extack;
4466         struct mlx5e_tc_flow_parse_attr *parse_attr;
4467         struct mlx5e_tc_flow *flow;
4468         int attr_size, err;
4469
4470         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4471         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4472         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4473                                &parse_attr, &flow);
4474         if (err)
4475                 goto out;
4476
4477         parse_attr->filter_dev = filter_dev;
4478         mlx5e_flow_esw_attr_init(flow->esw_attr,
4479                                  priv, parse_attr,
4480                                  f, in_rep, in_mdev);
4481
4482         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4483                                f, filter_dev);
4484         if (err)
4485                 goto err_free;
4486
4487         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4488         if (err)
4489                 goto err_free;
4490
4491         err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack);
4492         if (err)
4493                 goto err_free;
4494
4495         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4496         complete_all(&flow->init_done);
4497         if (err) {
4498                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4499                         goto err_free;
4500
4501                 add_unready_flow(flow);
4502         }
4503
4504         return flow;
4505
4506 err_free:
4507         mlx5e_flow_put(priv, flow);
4508 out:
4509         return ERR_PTR(err);
4510 }
4511
4512 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4513                                       struct mlx5e_tc_flow *flow,
4514                                       unsigned long flow_flags)
4515 {
4516         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4517         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4518         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4519         struct mlx5e_tc_flow_parse_attr *parse_attr;
4520         struct mlx5e_rep_priv *peer_urpriv;
4521         struct mlx5e_tc_flow *peer_flow;
4522         struct mlx5_core_dev *in_mdev;
4523         int err = 0;
4524
4525         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4526         if (!peer_esw)
4527                 return -ENODEV;
4528
4529         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4530         peer_priv = netdev_priv(peer_urpriv->netdev);
4531
4532         /* in_mdev is assigned of which the packet originated from.
4533          * So packets redirected to uplink use the same mdev of the
4534          * original flow and packets redirected from uplink use the
4535          * peer mdev.
4536          */
4537         if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
4538                 in_mdev = peer_priv->mdev;
4539         else
4540                 in_mdev = priv->mdev;
4541
4542         parse_attr = flow->esw_attr->parse_attr;
4543         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4544                                          parse_attr->filter_dev,
4545                                          flow->esw_attr->in_rep, in_mdev);
4546         if (IS_ERR(peer_flow)) {
4547                 err = PTR_ERR(peer_flow);
4548                 goto out;
4549         }
4550
4551         flow->peer_flow = peer_flow;
4552         flow_flag_set(flow, DUP);
4553         mutex_lock(&esw->offloads.peer_mutex);
4554         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4555         mutex_unlock(&esw->offloads.peer_mutex);
4556
4557 out:
4558         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4559         return err;
4560 }
4561
4562 static int
4563 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4564                    struct flow_cls_offload *f,
4565                    unsigned long flow_flags,
4566                    struct net_device *filter_dev,
4567                    struct mlx5e_tc_flow **__flow)
4568 {
4569         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4570         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4571         struct mlx5_core_dev *in_mdev = priv->mdev;
4572         struct mlx5e_tc_flow *flow;
4573         int err;
4574
4575         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4576                                     in_mdev);
4577         if (IS_ERR(flow))
4578                 return PTR_ERR(flow);
4579
4580         if (is_peer_flow_needed(flow)) {
4581                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4582                 if (err) {
4583                         mlx5e_tc_del_fdb_flow(priv, flow);
4584                         goto out;
4585                 }
4586         }
4587
4588         *__flow = flow;
4589
4590         return 0;
4591
4592 out:
4593         return err;
4594 }
4595
4596 static int
4597 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4598                    struct flow_cls_offload *f,
4599                    unsigned long flow_flags,
4600                    struct net_device *filter_dev,
4601                    struct mlx5e_tc_flow **__flow)
4602 {
4603         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4604         struct netlink_ext_ack *extack = f->common.extack;
4605         struct mlx5e_tc_flow_parse_attr *parse_attr;
4606         struct mlx5e_tc_flow *flow;
4607         int attr_size, err;
4608
4609         /* multi-chain not supported for NIC rules */
4610         if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4611                 return -EOPNOTSUPP;
4612
4613         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4614         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4615         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4616                                &parse_attr, &flow);
4617         if (err)
4618                 goto out;
4619
4620         parse_attr->filter_dev = filter_dev;
4621         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4622                                f, filter_dev);
4623         if (err)
4624                 goto err_free;
4625
4626         err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4627         if (err)
4628                 goto err_free;
4629
4630         err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4631         if (err)
4632                 goto err_free;
4633
4634         flow_flag_set(flow, OFFLOADED);
4635         kvfree(parse_attr);
4636         *__flow = flow;
4637
4638         return 0;
4639
4640 err_free:
4641         mlx5e_flow_put(priv, flow);
4642         kvfree(parse_attr);
4643 out:
4644         return err;
4645 }
4646
4647 static int
4648 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4649                   struct flow_cls_offload *f,
4650                   unsigned long flags,
4651                   struct net_device *filter_dev,
4652                   struct mlx5e_tc_flow **flow)
4653 {
4654         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4655         unsigned long flow_flags;
4656         int err;
4657
4658         get_flags(flags, &flow_flags);
4659
4660         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4661                 return -EOPNOTSUPP;
4662
4663         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4664                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4665                                          filter_dev, flow);
4666         else
4667                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4668                                          filter_dev, flow);
4669
4670         return err;
4671 }
4672
4673 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4674                                            struct mlx5e_rep_priv *rpriv)
4675 {
4676         /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4677          * sharing tc block with other slaves of a lag device.
4678          */
4679         return netif_is_lag_port(dev) && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4680 }
4681
4682 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4683                            struct flow_cls_offload *f, unsigned long flags)
4684 {
4685         struct netlink_ext_ack *extack = f->common.extack;
4686         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4687         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4688         struct mlx5e_tc_flow *flow;
4689         int err = 0;
4690
4691         rcu_read_lock();
4692         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4693         rcu_read_unlock();
4694         if (flow) {
4695                 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4696                  * just return 0.
4697                  */
4698                 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4699                         goto out;
4700
4701                 NL_SET_ERR_MSG_MOD(extack,
4702                                    "flow cookie already exists, ignoring");
4703                 netdev_warn_once(priv->netdev,
4704                                  "flow cookie %lx already exists, ignoring\n",
4705                                  f->cookie);
4706                 err = -EEXIST;
4707                 goto out;
4708         }
4709
4710         trace_mlx5e_configure_flower(f);
4711         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4712         if (err)
4713                 goto out;
4714
4715         /* Flow rule offloaded to non-uplink representor sharing tc block,
4716          * set the flow's owner dev.
4717          */
4718         if (is_flow_rule_duplicate_allowed(dev, rpriv))
4719                 flow->orig_dev = dev;
4720
4721         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4722         if (err)
4723                 goto err_free;
4724
4725         return 0;
4726
4727 err_free:
4728         mlx5e_flow_put(priv, flow);
4729 out:
4730         return err;
4731 }
4732
4733 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4734 {
4735         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4736         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4737
4738         return flow_flag_test(flow, INGRESS) == dir_ingress &&
4739                 flow_flag_test(flow, EGRESS) == dir_egress;
4740 }
4741
4742 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4743                         struct flow_cls_offload *f, unsigned long flags)
4744 {
4745         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4746         struct mlx5e_tc_flow *flow;
4747         int err;
4748
4749         rcu_read_lock();
4750         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4751         if (!flow || !same_flow_direction(flow, flags)) {
4752                 err = -EINVAL;
4753                 goto errout;
4754         }
4755
4756         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4757          * set.
4758          */
4759         if (flow_flag_test_and_set(flow, DELETED)) {
4760                 err = -EINVAL;
4761                 goto errout;
4762         }
4763         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4764         rcu_read_unlock();
4765
4766         trace_mlx5e_delete_flower(f);
4767         mlx5e_flow_put(priv, flow);
4768
4769         return 0;
4770
4771 errout:
4772         rcu_read_unlock();
4773         return err;
4774 }
4775
4776 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4777                        struct flow_cls_offload *f, unsigned long flags)
4778 {
4779         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4780         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4781         struct mlx5_eswitch *peer_esw;
4782         struct mlx5e_tc_flow *flow;
4783         struct mlx5_fc *counter;
4784         u64 lastuse = 0;
4785         u64 packets = 0;
4786         u64 bytes = 0;
4787         int err = 0;
4788
4789         rcu_read_lock();
4790         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4791                                                 tc_ht_params));
4792         rcu_read_unlock();
4793         if (IS_ERR(flow))
4794                 return PTR_ERR(flow);
4795
4796         if (!same_flow_direction(flow, flags)) {
4797                 err = -EINVAL;
4798                 goto errout;
4799         }
4800
4801         if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4802                 counter = mlx5e_tc_get_counter(flow);
4803                 if (!counter)
4804                         goto errout;
4805
4806                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4807         }
4808
4809         /* Under multipath it's possible for one rule to be currently
4810          * un-offloaded while the other rule is offloaded.
4811          */
4812         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4813         if (!peer_esw)
4814                 goto out;
4815
4816         if (flow_flag_test(flow, DUP) &&
4817             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4818                 u64 bytes2;
4819                 u64 packets2;
4820                 u64 lastuse2;
4821
4822                 counter = mlx5e_tc_get_counter(flow->peer_flow);
4823                 if (!counter)
4824                         goto no_peer_counter;
4825                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4826
4827                 bytes += bytes2;
4828                 packets += packets2;
4829                 lastuse = max_t(u64, lastuse, lastuse2);
4830         }
4831
4832 no_peer_counter:
4833         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4834 out:
4835         flow_stats_update(&f->stats, bytes, packets, lastuse,
4836                           FLOW_ACTION_HW_STATS_DELAYED);
4837         trace_mlx5e_stats_flower(f);
4838 errout:
4839         mlx5e_flow_put(priv, flow);
4840         return err;
4841 }
4842
4843 static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
4844                                struct netlink_ext_ack *extack)
4845 {
4846         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4847         struct mlx5_eswitch *esw;
4848         u16 vport_num;
4849         u32 rate_mbps;
4850         int err;
4851
4852         vport_num = rpriv->rep->vport;
4853         if (vport_num >= MLX5_VPORT_ECPF) {
4854                 NL_SET_ERR_MSG_MOD(extack,
4855                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4856                 return -EOPNOTSUPP;
4857         }
4858
4859         esw = priv->mdev->priv.eswitch;
4860         /* rate is given in bytes/sec.
4861          * First convert to bits/sec and then round to the nearest mbit/secs.
4862          * mbit means million bits.
4863          * Moreover, if rate is non zero we choose to configure to a minimum of
4864          * 1 mbit/sec.
4865          */
4866         rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
4867         err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
4868         if (err)
4869                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4870
4871         return err;
4872 }
4873
4874 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4875                                         struct flow_action *flow_action,
4876                                         struct netlink_ext_ack *extack)
4877 {
4878         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4879         const struct flow_action_entry *act;
4880         int err;
4881         int i;
4882
4883         if (!flow_action_has_entries(flow_action)) {
4884                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4885                 return -EINVAL;
4886         }
4887
4888         if (!flow_offload_has_one_action(flow_action)) {
4889                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4890                 return -EOPNOTSUPP;
4891         }
4892
4893         if (!flow_action_basic_hw_stats_check(flow_action, extack))
4894                 return -EOPNOTSUPP;
4895
4896         flow_action_for_each(i, act, flow_action) {
4897                 switch (act->id) {
4898                 case FLOW_ACTION_POLICE:
4899                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4900                         if (err)
4901                                 return err;
4902
4903                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4904                         break;
4905                 default:
4906                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4907                         return -EOPNOTSUPP;
4908                 }
4909         }
4910
4911         return 0;
4912 }
4913
4914 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4915                                 struct tc_cls_matchall_offload *ma)
4916 {
4917         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4918         struct netlink_ext_ack *extack = ma->common.extack;
4919
4920         if (!mlx5_esw_qos_enabled(esw)) {
4921                 NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
4922                 return -EOPNOTSUPP;
4923         }
4924
4925         if (ma->common.prio != 1) {
4926                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4927                 return -EINVAL;
4928         }
4929
4930         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4931 }
4932
4933 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4934                              struct tc_cls_matchall_offload *ma)
4935 {
4936         struct netlink_ext_ack *extack = ma->common.extack;
4937
4938         return apply_police_params(priv, 0, extack);
4939 }
4940
4941 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4942                              struct tc_cls_matchall_offload *ma)
4943 {
4944         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4945         struct rtnl_link_stats64 cur_stats;
4946         u64 dbytes;
4947         u64 dpkts;
4948
4949         cur_stats = priv->stats.vf_vport;
4950         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4951         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4952         rpriv->prev_vf_vport_stats = cur_stats;
4953         flow_stats_update(&ma->stats, dpkts, dbytes, jiffies,
4954                           FLOW_ACTION_HW_STATS_DELAYED);
4955 }
4956
4957 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4958                                               struct mlx5e_priv *peer_priv)
4959 {
4960         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4961         struct mlx5e_hairpin_entry *hpe, *tmp;
4962         LIST_HEAD(init_wait_list);
4963         u16 peer_vhca_id;
4964         int bkt;
4965
4966         if (!same_hw_devs(priv, peer_priv))
4967                 return;
4968
4969         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4970
4971         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4972         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4973                 if (refcount_inc_not_zero(&hpe->refcnt))
4974                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4975         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4976
4977         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4978                 wait_for_completion(&hpe->res_ready);
4979                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4980                         hpe->hp->pair->peer_gone = true;
4981
4982                 mlx5e_hairpin_put(priv, hpe);
4983         }
4984 }
4985
4986 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4987                                  unsigned long event, void *ptr)
4988 {
4989         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4990         struct mlx5e_flow_steering *fs;
4991         struct mlx5e_priv *peer_priv;
4992         struct mlx5e_tc_table *tc;
4993         struct mlx5e_priv *priv;
4994
4995         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4996             event != NETDEV_UNREGISTER ||
4997             ndev->reg_state == NETREG_REGISTERED)
4998                 return NOTIFY_DONE;
4999
5000         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5001         fs = container_of(tc, struct mlx5e_flow_steering, tc);
5002         priv = container_of(fs, struct mlx5e_priv, fs);
5003         peer_priv = netdev_priv(ndev);
5004         if (priv == peer_priv ||
5005             !(priv->netdev->features & NETIF_F_HW_TC))
5006                 return NOTIFY_DONE;
5007
5008         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5009
5010         return NOTIFY_DONE;
5011 }
5012
5013 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5014 {
5015         struct mlx5e_tc_table *tc = &priv->fs.tc;
5016         int err;
5017
5018         mutex_init(&tc->t_lock);
5019         mutex_init(&tc->mod_hdr.lock);
5020         hash_init(tc->mod_hdr.hlist);
5021         mutex_init(&tc->hairpin_tbl_lock);
5022         hash_init(tc->hairpin_tbl);
5023
5024         err = rhashtable_init(&tc->ht, &tc_ht_params);
5025         if (err)
5026                 return err;
5027
5028         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5029         err = register_netdevice_notifier_dev_net(priv->netdev,
5030                                                   &tc->netdevice_nb,
5031                                                   &tc->netdevice_nn);
5032         if (err) {
5033                 tc->netdevice_nb.notifier_call = NULL;
5034                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5035         }
5036
5037         return err;
5038 }
5039
5040 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5041 {
5042         struct mlx5e_tc_flow *flow = ptr;
5043         struct mlx5e_priv *priv = flow->priv;
5044
5045         mlx5e_tc_del_flow(priv, flow);
5046         kfree(flow);
5047 }
5048
5049 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5050 {
5051         struct mlx5e_tc_table *tc = &priv->fs.tc;
5052
5053         if (tc->netdevice_nb.notifier_call)
5054                 unregister_netdevice_notifier_dev_net(priv->netdev,
5055                                                       &tc->netdevice_nb,
5056                                                       &tc->netdevice_nn);
5057
5058         mutex_destroy(&tc->mod_hdr.lock);
5059         mutex_destroy(&tc->hairpin_tbl_lock);
5060
5061         rhashtable_destroy(&tc->ht);
5062
5063         if (!IS_ERR_OR_NULL(tc->t)) {
5064                 mlx5_destroy_flow_table(tc->t);
5065                 tc->t = NULL;
5066         }
5067         mutex_destroy(&tc->t_lock);
5068 }
5069
5070 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
5071 {
5072         const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5073         struct mlx5_rep_uplink_priv *uplink_priv;
5074         struct mlx5e_rep_priv *priv;
5075         struct mapping_ctx *mapping;
5076         int err;
5077
5078         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5079         priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5080
5081         err = mlx5_tc_ct_init(uplink_priv);
5082         if (err)
5083                 goto err_ct;
5084
5085         mapping = mapping_create(sizeof(struct tunnel_match_key),
5086                                  TUNNEL_INFO_BITS_MASK, true);
5087         if (IS_ERR(mapping)) {
5088                 err = PTR_ERR(mapping);
5089                 goto err_tun_mapping;
5090         }
5091         uplink_priv->tunnel_mapping = mapping;
5092
5093         mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
5094         if (IS_ERR(mapping)) {
5095                 err = PTR_ERR(mapping);
5096                 goto err_enc_opts_mapping;
5097         }
5098         uplink_priv->tunnel_enc_opts_mapping = mapping;
5099
5100         err = rhashtable_init(tc_ht, &tc_ht_params);
5101         if (err)
5102                 goto err_ht_init;
5103
5104         return err;
5105
5106 err_ht_init:
5107         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5108 err_enc_opts_mapping:
5109         mapping_destroy(uplink_priv->tunnel_mapping);
5110 err_tun_mapping:
5111         mlx5_tc_ct_clean(uplink_priv);
5112 err_ct:
5113         netdev_warn(priv->netdev,
5114                     "Failed to initialize tc (eswitch), err: %d", err);
5115         return err;
5116 }
5117
5118 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
5119 {
5120         struct mlx5_rep_uplink_priv *uplink_priv;
5121
5122         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5123
5124         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5125         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5126         mapping_destroy(uplink_priv->tunnel_mapping);
5127
5128         mlx5_tc_ct_clean(uplink_priv);
5129 }
5130
5131 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5132 {
5133         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5134
5135         return atomic_read(&tc_ht->nelems);
5136 }
5137
5138 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5139 {
5140         struct mlx5e_tc_flow *flow, *tmp;
5141
5142         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5143                 __mlx5e_tc_del_fdb_peer_flow(flow);
5144 }
5145
5146 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5147 {
5148         struct mlx5_rep_uplink_priv *rpriv =
5149                 container_of(work, struct mlx5_rep_uplink_priv,
5150                              reoffload_flows_work);
5151         struct mlx5e_tc_flow *flow, *tmp;
5152
5153         mutex_lock(&rpriv->unready_flows_lock);
5154         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5155                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5156                         unready_flow_del(flow);
5157         }
5158         mutex_unlock(&rpriv->unready_flows_lock);
5159 }
5160
5161 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5162                                      struct flow_cls_offload *cls_flower,
5163                                      unsigned long flags)
5164 {
5165         switch (cls_flower->command) {
5166         case FLOW_CLS_REPLACE:
5167                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5168                                               flags);
5169         case FLOW_CLS_DESTROY:
5170                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5171                                            flags);
5172         case FLOW_CLS_STATS:
5173                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5174                                           flags);
5175         default:
5176                 return -EOPNOTSUPP;
5177         }
5178 }
5179
5180 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5181                             void *cb_priv)
5182 {
5183         unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
5184         struct mlx5e_priv *priv = cb_priv;
5185
5186         switch (type) {
5187         case TC_SETUP_CLSFLOWER:
5188                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5189         default:
5190                 return -EOPNOTSUPP;
5191         }
5192 }