Merge tag 'rtc-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <net/tc_act/tc_gact.h>
38 #include <net/tc_act/tc_skbedit.h>
39 #include <linux/mlx5/fs.h>
40 #include <linux/mlx5/device.h>
41 #include <linux/rhashtable.h>
42 #include <linux/refcount.h>
43 #include <linux/completion.h>
44 #include <net/tc_act/tc_mirred.h>
45 #include <net/tc_act/tc_vlan.h>
46 #include <net/tc_act/tc_tunnel_key.h>
47 #include <net/tc_act/tc_pedit.h>
48 #include <net/tc_act/tc_csum.h>
49 #include <net/tc_act/tc_mpls.h>
50 #include <net/arp.h>
51 #include <net/ipv6_stubs.h>
52 #include <net/bareudp.h>
53 #include <net/bonding.h>
54 #include "en.h"
55 #include "en_rep.h"
56 #include "en/rep/tc.h"
57 #include "en/rep/neigh.h"
58 #include "en_tc.h"
59 #include "eswitch.h"
60 #include "fs_core.h"
61 #include "en/port.h"
62 #include "en/tc_tun.h"
63 #include "en/mapping.h"
64 #include "en/tc_ct.h"
65 #include "en/mod_hdr.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "lib/fs_chains.h"
69 #include "diag/en_tc_tracepoint.h"
70
71 #define nic_chains(priv) ((priv)->fs.tc.chains)
72 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
73 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
74
75 enum {
76         MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
77         MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
78         MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
79         MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
80         MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
81         MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
82         MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
83         MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
84         MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
85         MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
86         MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
87         MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
88         MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
89         MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
90 };
91
92 #define MLX5E_TC_MAX_SPLITS 1
93
94 /* Helper struct for accessing a struct containing list_head array.
95  * Containing struct
96  *   |- Helper array
97  *      [0] Helper item 0
98  *          |- list_head item 0
99  *          |- index (0)
100  *      [1] Helper item 1
101  *          |- list_head item 1
102  *          |- index (1)
103  * To access the containing struct from one of the list_head items:
104  * 1. Get the helper item from the list_head item using
105  *    helper item =
106  *        container_of(list_head item, helper struct type, list_head field)
107  * 2. Get the contining struct from the helper item and its index in the array:
108  *    containing struct =
109  *        container_of(helper item, containing struct type, helper field[index])
110  */
111 struct encap_flow_item {
112         struct mlx5e_encap_entry *e; /* attached encap instance */
113         struct list_head list;
114         int index;
115 };
116
117 struct mlx5e_tc_flow {
118         struct rhash_head       node;
119         struct mlx5e_priv       *priv;
120         u64                     cookie;
121         unsigned long           flags;
122         struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
123
124         /* flows sharing the same reformat object - currently mpls decap */
125         struct list_head l3_to_l2_reformat;
126         struct mlx5e_decap_entry *decap_reformat;
127
128         /* Flow can be associated with multiple encap IDs.
129          * The number of encaps is bounded by the number of supported
130          * destinations.
131          */
132         struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
133         struct mlx5e_tc_flow    *peer_flow;
134         struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
135         struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
136         struct list_head        hairpin; /* flows sharing the same hairpin */
137         struct list_head        peer;    /* flows with peer flow */
138         struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
139         struct net_device       *orig_dev; /* netdev adding flow first */
140         int                     tmp_efi_index;
141         struct list_head        tmp_list; /* temporary flow list used by neigh update */
142         refcount_t              refcnt;
143         struct rcu_head         rcu_head;
144         struct completion       init_done;
145         int tunnel_id; /* the mapped tunnel id of this flow */
146         struct mlx5_flow_attr *attr;
147 };
148
149 struct mlx5e_tc_flow_parse_attr {
150         const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
151         struct net_device *filter_dev;
152         struct mlx5_flow_spec spec;
153         struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
154         int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
155         struct ethhdr eth;
156 };
157
158 #define MLX5E_TC_TABLE_NUM_GROUPS 4
159 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
160
161 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
162         [CHAIN_TO_REG] = {
163                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
164                 .moffset = 0,
165                 .mlen = 2,
166         },
167         [TUNNEL_TO_REG] = {
168                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
169                 .moffset = 1,
170                 .mlen = 3,
171                 .soffset = MLX5_BYTE_OFF(fte_match_param,
172                                          misc_parameters_2.metadata_reg_c_1),
173         },
174         [ZONE_TO_REG] = zone_to_reg_ct,
175         [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
176         [CTSTATE_TO_REG] = ctstate_to_reg_ct,
177         [MARK_TO_REG] = mark_to_reg_ct,
178         [LABELS_TO_REG] = labels_to_reg_ct,
179         [FTEID_TO_REG] = fteid_to_reg_ct,
180         /* For NIC rules we store the retore metadata directly
181          * into reg_b that is passed to SW since we don't
182          * jump between steering domains.
183          */
184         [NIC_CHAIN_TO_REG] = {
185                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
186                 .moffset = 0,
187                 .mlen = 2,
188         },
189         [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
190 };
191
192 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
193
194 void
195 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
196                             enum mlx5e_tc_attr_to_reg type,
197                             u32 data,
198                             u32 mask)
199 {
200         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
201         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
202         void *headers_c = spec->match_criteria;
203         void *headers_v = spec->match_value;
204         void *fmask, *fval;
205
206         fmask = headers_c + soffset;
207         fval = headers_v + soffset;
208
209         mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
210         data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
211
212         memcpy(fmask, &mask, match_len);
213         memcpy(fval, &data, match_len);
214
215         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
216 }
217
218 void
219 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
220                                 enum mlx5e_tc_attr_to_reg type,
221                                 u32 *data,
222                                 u32 *mask)
223 {
224         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
225         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
226         void *headers_c = spec->match_criteria;
227         void *headers_v = spec->match_value;
228         void *fmask, *fval;
229
230         fmask = headers_c + soffset;
231         fval = headers_v + soffset;
232
233         memcpy(mask, fmask, match_len);
234         memcpy(data, fval, match_len);
235
236         *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
237         *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
238 }
239
240 int
241 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
242                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
243                           enum mlx5_flow_namespace_type ns,
244                           enum mlx5e_tc_attr_to_reg type,
245                           u32 data)
246 {
247         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
248         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
249         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
250         char *modact;
251         int err;
252
253         err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
254         if (err)
255                 return err;
256
257         modact = mod_hdr_acts->actions +
258                  (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
259
260         /* Firmware has 5bit length field and 0 means 32bits */
261         if (mlen == 4)
262                 mlen = 0;
263
264         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
265         MLX5_SET(set_action_in, modact, field, mfield);
266         MLX5_SET(set_action_in, modact, offset, moffset * 8);
267         MLX5_SET(set_action_in, modact, length, mlen * 8);
268         MLX5_SET(set_action_in, modact, data, data);
269         mod_hdr_acts->num_actions++;
270
271         return 0;
272 }
273
274 static struct mlx5_tc_ct_priv *
275 get_ct_priv(struct mlx5e_priv *priv)
276 {
277         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
278         struct mlx5_rep_uplink_priv *uplink_priv;
279         struct mlx5e_rep_priv *uplink_rpriv;
280
281         if (is_mdev_switchdev_mode(priv->mdev)) {
282                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
283                 uplink_priv = &uplink_rpriv->uplink_priv;
284
285                 return uplink_priv->ct_priv;
286         }
287
288         return priv->fs.tc.ct;
289 }
290
291 struct mlx5_flow_handle *
292 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
293                     struct mlx5_flow_spec *spec,
294                     struct mlx5_flow_attr *attr)
295 {
296         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
297
298         if (is_mdev_switchdev_mode(priv->mdev))
299                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
300
301         return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
302 }
303
304 void
305 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
306                     struct mlx5_flow_handle *rule,
307                     struct mlx5_flow_attr *attr)
308 {
309         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
310
311         if (is_mdev_switchdev_mode(priv->mdev)) {
312                 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
313
314                 return;
315         }
316
317         mlx5e_del_offloaded_nic_rule(priv, rule, attr);
318 }
319
320 struct mlx5e_hairpin {
321         struct mlx5_hairpin *pair;
322
323         struct mlx5_core_dev *func_mdev;
324         struct mlx5e_priv *func_priv;
325         u32 tdn;
326         u32 tirn;
327
328         int num_channels;
329         struct mlx5e_rqt indir_rqt;
330         u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
331         struct mlx5e_ttc_table ttc;
332 };
333
334 struct mlx5e_hairpin_entry {
335         /* a node of a hash table which keeps all the  hairpin entries */
336         struct hlist_node hairpin_hlist;
337
338         /* protects flows list */
339         spinlock_t flows_lock;
340         /* flows sharing the same hairpin */
341         struct list_head flows;
342         /* hpe's that were not fully initialized when dead peer update event
343          * function traversed them.
344          */
345         struct list_head dead_peer_wait_list;
346
347         u16 peer_vhca_id;
348         u8 prio;
349         struct mlx5e_hairpin *hp;
350         refcount_t refcnt;
351         struct completion res_ready;
352 };
353
354 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
355                               struct mlx5e_tc_flow *flow);
356
357 static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
358 {
359         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
360                 return ERR_PTR(-EINVAL);
361         return flow;
362 }
363
364 static void mlx5e_flow_put(struct mlx5e_priv *priv,
365                            struct mlx5e_tc_flow *flow)
366 {
367         if (refcount_dec_and_test(&flow->refcnt)) {
368                 mlx5e_tc_del_flow(priv, flow);
369                 kfree_rcu(flow, rcu_head);
370         }
371 }
372
373 static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
374 {
375         /* Complete all memory stores before setting bit. */
376         smp_mb__before_atomic();
377         set_bit(flag, &flow->flags);
378 }
379
380 #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
381
382 static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
383                                      unsigned long flag)
384 {
385         /* test_and_set_bit() provides all necessary barriers */
386         return test_and_set_bit(flag, &flow->flags);
387 }
388
389 #define flow_flag_test_and_set(flow, flag)                      \
390         __flow_flag_test_and_set(flow,                          \
391                                  MLX5E_TC_FLOW_FLAG_##flag)
392
393 static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
394 {
395         /* Complete all memory stores before clearing bit. */
396         smp_mb__before_atomic();
397         clear_bit(flag, &flow->flags);
398 }
399
400 #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
401                                                       MLX5E_TC_FLOW_FLAG_##flag)
402
403 static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
404 {
405         bool ret = test_bit(flag, &flow->flags);
406
407         /* Read fields of flow structure only after checking flags. */
408         smp_mb__after_atomic();
409         return ret;
410 }
411
412 #define flow_flag_test(flow, flag) __flow_flag_test(flow, \
413                                                     MLX5E_TC_FLOW_FLAG_##flag)
414
415 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
416 {
417         return flow_flag_test(flow, ESWITCH);
418 }
419
420 static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
421 {
422         return flow_flag_test(flow, FT);
423 }
424
425 static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
426 {
427         return flow_flag_test(flow, OFFLOADED);
428 }
429
430 static int get_flow_name_space(struct mlx5e_tc_flow *flow)
431 {
432         return mlx5e_is_eswitch_flow(flow) ?
433                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
434 }
435
436 static struct mod_hdr_tbl *
437 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
438 {
439         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
440
441         return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
442                 &esw->offloads.mod_hdr :
443                 &priv->fs.tc.mod_hdr;
444 }
445
446 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
447                                 struct mlx5e_tc_flow *flow,
448                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
449 {
450         struct mlx5_modify_hdr *modify_hdr;
451         struct mlx5e_mod_hdr_handle *mh;
452
453         mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
454                                   get_flow_name_space(flow),
455                                   &parse_attr->mod_hdr_acts);
456         if (IS_ERR(mh))
457                 return PTR_ERR(mh);
458
459         modify_hdr = mlx5e_mod_hdr_get(mh);
460         flow->attr->modify_hdr = modify_hdr;
461         flow->mh = mh;
462
463         return 0;
464 }
465
466 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
467                                  struct mlx5e_tc_flow *flow)
468 {
469         /* flow wasn't fully initialized */
470         if (!flow->mh)
471                 return;
472
473         mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
474                              flow->mh);
475         flow->mh = NULL;
476 }
477
478 static
479 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
480 {
481         struct net_device *netdev;
482         struct mlx5e_priv *priv;
483
484         netdev = __dev_get_by_index(net, ifindex);
485         priv = netdev_priv(netdev);
486         return priv->mdev;
487 }
488
489 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
490 {
491         u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
492         void *tirc;
493         int err;
494
495         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
496         if (err)
497                 goto alloc_tdn_err;
498
499         tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
500
501         MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
502         MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
503         MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
504
505         err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
506         if (err)
507                 goto create_tir_err;
508
509         return 0;
510
511 create_tir_err:
512         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
513 alloc_tdn_err:
514         return err;
515 }
516
517 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
518 {
519         mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
520         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
521 }
522
523 static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
524 {
525         u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
526         struct mlx5e_priv *priv = hp->func_priv;
527         int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
528
529         mlx5e_build_default_indir_rqt(indirection_rqt, sz,
530                                       hp->num_channels);
531
532         for (i = 0; i < sz; i++) {
533                 ix = i;
534                 if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
535                         ix = mlx5e_bits_invert(i, ilog2(sz));
536                 ix = indirection_rqt[ix];
537                 rqn = hp->pair->rqn[ix];
538                 MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
539         }
540 }
541
542 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
543 {
544         int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
545         struct mlx5e_priv *priv = hp->func_priv;
546         struct mlx5_core_dev *mdev = priv->mdev;
547         void *rqtc;
548         u32 *in;
549
550         inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
551         in = kvzalloc(inlen, GFP_KERNEL);
552         if (!in)
553                 return -ENOMEM;
554
555         rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
556
557         MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
558         MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
559
560         mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
561
562         err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
563         if (!err)
564                 hp->indir_rqt.enabled = true;
565
566         kvfree(in);
567         return err;
568 }
569
570 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
571 {
572         struct mlx5e_priv *priv = hp->func_priv;
573         u32 in[MLX5_ST_SZ_DW(create_tir_in)];
574         int tt, i, err;
575         void *tirc;
576
577         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
578                 struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
579
580                 memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
581                 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
582
583                 MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
584                 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
585                 MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
586                 mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
587
588                 err = mlx5_core_create_tir(hp->func_mdev, in,
589                                            &hp->indir_tirn[tt]);
590                 if (err) {
591                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
592                         goto err_destroy_tirs;
593                 }
594         }
595         return 0;
596
597 err_destroy_tirs:
598         for (i = 0; i < tt; i++)
599                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
600         return err;
601 }
602
603 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
604 {
605         int tt;
606
607         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
608                 mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
609 }
610
611 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
612                                          struct ttc_params *ttc_params)
613 {
614         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
615         int tt;
616
617         memset(ttc_params, 0, sizeof(*ttc_params));
618
619         ttc_params->any_tt_tirn = hp->tirn;
620
621         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
622                 ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
623
624         ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
625         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
626         ft_attr->prio = MLX5E_TC_PRIO;
627 }
628
629 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
630 {
631         struct mlx5e_priv *priv = hp->func_priv;
632         struct ttc_params ttc_params;
633         int err;
634
635         err = mlx5e_hairpin_create_indirect_rqt(hp);
636         if (err)
637                 return err;
638
639         err = mlx5e_hairpin_create_indirect_tirs(hp);
640         if (err)
641                 goto err_create_indirect_tirs;
642
643         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
644         err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
645         if (err)
646                 goto err_create_ttc_table;
647
648         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
649                    hp->num_channels, hp->ttc.ft.t->id);
650
651         return 0;
652
653 err_create_ttc_table:
654         mlx5e_hairpin_destroy_indirect_tirs(hp);
655 err_create_indirect_tirs:
656         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
657
658         return err;
659 }
660
661 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
662 {
663         struct mlx5e_priv *priv = hp->func_priv;
664
665         mlx5e_destroy_ttc_table(priv, &hp->ttc);
666         mlx5e_hairpin_destroy_indirect_tirs(hp);
667         mlx5e_destroy_rqt(priv, &hp->indir_rqt);
668 }
669
670 static struct mlx5e_hairpin *
671 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
672                      int peer_ifindex)
673 {
674         struct mlx5_core_dev *func_mdev, *peer_mdev;
675         struct mlx5e_hairpin *hp;
676         struct mlx5_hairpin *pair;
677         int err;
678
679         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
680         if (!hp)
681                 return ERR_PTR(-ENOMEM);
682
683         func_mdev = priv->mdev;
684         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
685
686         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
687         if (IS_ERR(pair)) {
688                 err = PTR_ERR(pair);
689                 goto create_pair_err;
690         }
691         hp->pair = pair;
692         hp->func_mdev = func_mdev;
693         hp->func_priv = priv;
694         hp->num_channels = params->num_channels;
695
696         err = mlx5e_hairpin_create_transport(hp);
697         if (err)
698                 goto create_transport_err;
699
700         if (hp->num_channels > 1) {
701                 err = mlx5e_hairpin_rss_init(hp);
702                 if (err)
703                         goto rss_init_err;
704         }
705
706         return hp;
707
708 rss_init_err:
709         mlx5e_hairpin_destroy_transport(hp);
710 create_transport_err:
711         mlx5_core_hairpin_destroy(hp->pair);
712 create_pair_err:
713         kfree(hp);
714         return ERR_PTR(err);
715 }
716
717 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
718 {
719         if (hp->num_channels > 1)
720                 mlx5e_hairpin_rss_cleanup(hp);
721         mlx5e_hairpin_destroy_transport(hp);
722         mlx5_core_hairpin_destroy(hp->pair);
723         kvfree(hp);
724 }
725
726 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
727 {
728         return (peer_vhca_id << 16 | prio);
729 }
730
731 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
732                                                      u16 peer_vhca_id, u8 prio)
733 {
734         struct mlx5e_hairpin_entry *hpe;
735         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
736
737         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
738                                hairpin_hlist, hash_key) {
739                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
740                         refcount_inc(&hpe->refcnt);
741                         return hpe;
742                 }
743         }
744
745         return NULL;
746 }
747
748 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
749                               struct mlx5e_hairpin_entry *hpe)
750 {
751         /* no more hairpin flows for us, release the hairpin pair */
752         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
753                 return;
754         hash_del(&hpe->hairpin_hlist);
755         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
756
757         if (!IS_ERR_OR_NULL(hpe->hp)) {
758                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
759                            dev_name(hpe->hp->pair->peer_mdev->device));
760
761                 mlx5e_hairpin_destroy(hpe->hp);
762         }
763
764         WARN_ON(!list_empty(&hpe->flows));
765         kfree(hpe);
766 }
767
768 #define UNKNOWN_MATCH_PRIO 8
769
770 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
771                                   struct mlx5_flow_spec *spec, u8 *match_prio,
772                                   struct netlink_ext_ack *extack)
773 {
774         void *headers_c, *headers_v;
775         u8 prio_val, prio_mask = 0;
776         bool vlan_present;
777
778 #ifdef CONFIG_MLX5_CORE_EN_DCB
779         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
780                 NL_SET_ERR_MSG_MOD(extack,
781                                    "only PCP trust state supported for hairpin");
782                 return -EOPNOTSUPP;
783         }
784 #endif
785         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
786         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
787
788         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
789         if (vlan_present) {
790                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
791                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
792         }
793
794         if (!vlan_present || !prio_mask) {
795                 prio_val = UNKNOWN_MATCH_PRIO;
796         } else if (prio_mask != 0x7) {
797                 NL_SET_ERR_MSG_MOD(extack,
798                                    "masked priority match not supported for hairpin");
799                 return -EOPNOTSUPP;
800         }
801
802         *match_prio = prio_val;
803         return 0;
804 }
805
806 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
807                                   struct mlx5e_tc_flow *flow,
808                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
809                                   struct netlink_ext_ack *extack)
810 {
811         int peer_ifindex = parse_attr->mirred_ifindex[0];
812         struct mlx5_hairpin_params params;
813         struct mlx5_core_dev *peer_mdev;
814         struct mlx5e_hairpin_entry *hpe;
815         struct mlx5e_hairpin *hp;
816         u64 link_speed64;
817         u32 link_speed;
818         u8 match_prio;
819         u16 peer_id;
820         int err;
821
822         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
823         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
824                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
825                 return -EOPNOTSUPP;
826         }
827
828         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
829         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
830                                      extack);
831         if (err)
832                 return err;
833
834         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
835         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
836         if (hpe) {
837                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
838                 wait_for_completion(&hpe->res_ready);
839
840                 if (IS_ERR(hpe->hp)) {
841                         err = -EREMOTEIO;
842                         goto out_err;
843                 }
844                 goto attach_flow;
845         }
846
847         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
848         if (!hpe) {
849                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
850                 return -ENOMEM;
851         }
852
853         spin_lock_init(&hpe->flows_lock);
854         INIT_LIST_HEAD(&hpe->flows);
855         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
856         hpe->peer_vhca_id = peer_id;
857         hpe->prio = match_prio;
858         refcount_set(&hpe->refcnt, 1);
859         init_completion(&hpe->res_ready);
860
861         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
862                  hash_hairpin_info(peer_id, match_prio));
863         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
864
865         params.log_data_size = 15;
866         params.log_data_size = min_t(u8, params.log_data_size,
867                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
868         params.log_data_size = max_t(u8, params.log_data_size,
869                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
870
871         params.log_num_packets = params.log_data_size -
872                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
873         params.log_num_packets = min_t(u8, params.log_num_packets,
874                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
875
876         params.q_counter = priv->q_counter;
877         /* set hairpin pair per each 50Gbs share of the link */
878         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
879         link_speed = max_t(u32, link_speed, 50000);
880         link_speed64 = link_speed;
881         do_div(link_speed64, 50000);
882         params.num_channels = link_speed64;
883
884         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
885         hpe->hp = hp;
886         complete_all(&hpe->res_ready);
887         if (IS_ERR(hp)) {
888                 err = PTR_ERR(hp);
889                 goto out_err;
890         }
891
892         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
893                    hp->tirn, hp->pair->rqn[0],
894                    dev_name(hp->pair->peer_mdev->device),
895                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
896
897 attach_flow:
898         if (hpe->hp->num_channels > 1) {
899                 flow_flag_set(flow, HAIRPIN_RSS);
900                 flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
901         } else {
902                 flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
903         }
904
905         flow->hpe = hpe;
906         spin_lock(&hpe->flows_lock);
907         list_add(&flow->hairpin, &hpe->flows);
908         spin_unlock(&hpe->flows_lock);
909
910         return 0;
911
912 out_err:
913         mlx5e_hairpin_put(priv, hpe);
914         return err;
915 }
916
917 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
918                                    struct mlx5e_tc_flow *flow)
919 {
920         /* flow wasn't fully initialized */
921         if (!flow->hpe)
922                 return;
923
924         spin_lock(&flow->hpe->flows_lock);
925         list_del(&flow->hairpin);
926         spin_unlock(&flow->hpe->flows_lock);
927
928         mlx5e_hairpin_put(priv, flow->hpe);
929         flow->hpe = NULL;
930 }
931
932 struct mlx5_flow_handle *
933 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
934                              struct mlx5_flow_spec *spec,
935                              struct mlx5_flow_attr *attr)
936 {
937         struct mlx5_flow_context *flow_context = &spec->flow_context;
938         struct mlx5_fs_chains *nic_chains = nic_chains(priv);
939         struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
940         struct mlx5e_tc_table *tc = &priv->fs.tc;
941         struct mlx5_flow_destination dest[2] = {};
942         struct mlx5_flow_act flow_act = {
943                 .action = attr->action,
944                 .flags    = FLOW_ACT_NO_APPEND,
945         };
946         struct mlx5_flow_handle *rule;
947         struct mlx5_flow_table *ft;
948         int dest_ix = 0;
949
950         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
951         flow_context->flow_tag = nic_attr->flow_tag;
952
953         if (attr->dest_ft) {
954                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
955                 dest[dest_ix].ft = attr->dest_ft;
956                 dest_ix++;
957         } else if (nic_attr->hairpin_ft) {
958                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
959                 dest[dest_ix].ft = nic_attr->hairpin_ft;
960                 dest_ix++;
961         } else if (nic_attr->hairpin_tirn) {
962                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
963                 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
964                 dest_ix++;
965         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
966                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
967                 if (attr->dest_chain) {
968                         dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
969                                                                  attr->dest_chain, 1,
970                                                                  MLX5E_TC_FT_LEVEL);
971                         if (IS_ERR(dest[dest_ix].ft))
972                                 return ERR_CAST(dest[dest_ix].ft);
973                 } else {
974                         dest[dest_ix].ft = priv->fs.vlan.ft.t;
975                 }
976                 dest_ix++;
977         }
978
979         if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
980             MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
981                 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
982
983         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
984                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
985                 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
986                 dest_ix++;
987         }
988
989         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
990                 flow_act.modify_hdr = attr->modify_hdr;
991
992         mutex_lock(&tc->t_lock);
993         if (IS_ERR_OR_NULL(tc->t)) {
994                 /* Create the root table here if doesn't exist yet */
995                 tc->t =
996                         mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
997
998                 if (IS_ERR(tc->t)) {
999                         mutex_unlock(&tc->t_lock);
1000                         netdev_err(priv->netdev,
1001                                    "Failed to create tc offload table\n");
1002                         rule = ERR_CAST(priv->fs.tc.t);
1003                         goto err_ft_get;
1004                 }
1005         }
1006         mutex_unlock(&tc->t_lock);
1007
1008         if (attr->chain || attr->prio)
1009                 ft = mlx5_chains_get_table(nic_chains,
1010                                            attr->chain, attr->prio,
1011                                            MLX5E_TC_FT_LEVEL);
1012         else
1013                 ft = attr->ft;
1014
1015         if (IS_ERR(ft)) {
1016                 rule = ERR_CAST(ft);
1017                 goto err_ft_get;
1018         }
1019
1020         if (attr->outer_match_level != MLX5_MATCH_NONE)
1021                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1022
1023         rule = mlx5_add_flow_rules(ft, spec,
1024                                    &flow_act, dest, dest_ix);
1025         if (IS_ERR(rule))
1026                 goto err_rule;
1027
1028         return rule;
1029
1030 err_rule:
1031         if (attr->chain || attr->prio)
1032                 mlx5_chains_put_table(nic_chains,
1033                                       attr->chain, attr->prio,
1034                                       MLX5E_TC_FT_LEVEL);
1035 err_ft_get:
1036         if (attr->dest_chain)
1037                 mlx5_chains_put_table(nic_chains,
1038                                       attr->dest_chain, 1,
1039                                       MLX5E_TC_FT_LEVEL);
1040
1041         return ERR_CAST(rule);
1042 }
1043
1044 static int
1045 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1046                       struct mlx5e_tc_flow_parse_attr *parse_attr,
1047                       struct mlx5e_tc_flow *flow,
1048                       struct netlink_ext_ack *extack)
1049 {
1050         struct mlx5_flow_attr *attr = flow->attr;
1051         struct mlx5_core_dev *dev = priv->mdev;
1052         struct mlx5_fc *counter = NULL;
1053         int err;
1054
1055         if (flow_flag_test(flow, HAIRPIN)) {
1056                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1057                 if (err)
1058                         return err;
1059         }
1060
1061         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1062                 counter = mlx5_fc_create(dev, true);
1063                 if (IS_ERR(counter))
1064                         return PTR_ERR(counter);
1065
1066                 attr->counter = counter;
1067         }
1068
1069         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1070                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1071                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1072                 if (err)
1073                         return err;
1074         }
1075
1076         if (flow_flag_test(flow, CT))
1077                 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1078                                                         attr, &parse_attr->mod_hdr_acts);
1079         else
1080                 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1081                                                              attr);
1082
1083         return PTR_ERR_OR_ZERO(flow->rule[0]);
1084 }
1085
1086 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1087                                   struct mlx5_flow_handle *rule,
1088                                   struct mlx5_flow_attr *attr)
1089 {
1090         struct mlx5_fs_chains *nic_chains = nic_chains(priv);
1091
1092         mlx5_del_flow_rules(rule);
1093
1094         if (attr->chain || attr->prio)
1095                 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1096                                       MLX5E_TC_FT_LEVEL);
1097
1098         if (attr->dest_chain)
1099                 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1100                                       MLX5E_TC_FT_LEVEL);
1101 }
1102
1103 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1104                                   struct mlx5e_tc_flow *flow)
1105 {
1106         struct mlx5_flow_attr *attr = flow->attr;
1107         struct mlx5e_tc_table *tc = &priv->fs.tc;
1108
1109         flow_flag_clear(flow, OFFLOADED);
1110
1111         if (flow_flag_test(flow, CT))
1112                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1113         else if (!IS_ERR_OR_NULL(flow->rule[0]))
1114                 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1115
1116         /* Remove root table if no rules are left to avoid
1117          * extra steering hops.
1118          */
1119         mutex_lock(&priv->fs.tc.t_lock);
1120         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1121             !IS_ERR_OR_NULL(tc->t)) {
1122                 mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1123                 priv->fs.tc.t = NULL;
1124         }
1125         mutex_unlock(&priv->fs.tc.t_lock);
1126
1127         kvfree(attr->parse_attr);
1128
1129         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1130                 mlx5e_detach_mod_hdr(priv, flow);
1131
1132         mlx5_fc_destroy(priv->mdev, attr->counter);
1133
1134         if (flow_flag_test(flow, HAIRPIN))
1135                 mlx5e_hairpin_flow_del(priv, flow);
1136
1137         kfree(flow->attr);
1138 }
1139
1140 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1141                                struct mlx5e_tc_flow *flow, int out_index);
1142
1143 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1144                               struct mlx5e_tc_flow *flow,
1145                               struct net_device *mirred_dev,
1146                               int out_index,
1147                               struct netlink_ext_ack *extack,
1148                               struct net_device **encap_dev,
1149                               bool *encap_valid);
1150 static int mlx5e_attach_decap(struct mlx5e_priv *priv,
1151                               struct mlx5e_tc_flow *flow,
1152                               struct netlink_ext_ack *extack);
1153 static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1154                                struct mlx5e_tc_flow *flow);
1155
1156 static struct mlx5_flow_handle *
1157 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1158                            struct mlx5e_tc_flow *flow,
1159                            struct mlx5_flow_spec *spec,
1160                            struct mlx5_flow_attr *attr)
1161 {
1162         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1163         struct mlx5_flow_handle *rule;
1164
1165         if (flow_flag_test(flow, CT)) {
1166                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1167
1168                 return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1169                                                flow, spec, attr,
1170                                                mod_hdr_acts);
1171         }
1172
1173         rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1174         if (IS_ERR(rule))
1175                 return rule;
1176
1177         if (attr->esw_attr->split_count) {
1178                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1179                 if (IS_ERR(flow->rule[1])) {
1180                         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1181                         return flow->rule[1];
1182                 }
1183         }
1184
1185         return rule;
1186 }
1187
1188 static void
1189 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1190                              struct mlx5e_tc_flow *flow,
1191                              struct mlx5_flow_attr *attr)
1192 {
1193         flow_flag_clear(flow, OFFLOADED);
1194
1195         if (flow_flag_test(flow, CT)) {
1196                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1197                 return;
1198         }
1199
1200         if (attr->esw_attr->split_count)
1201                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1202
1203         mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1204 }
1205
1206 static struct mlx5_flow_handle *
1207 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1208                               struct mlx5e_tc_flow *flow,
1209                               struct mlx5_flow_spec *spec)
1210 {
1211         struct mlx5_flow_attr *slow_attr;
1212         struct mlx5_flow_handle *rule;
1213
1214         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1215         if (!slow_attr)
1216                 return ERR_PTR(-ENOMEM);
1217
1218         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1219         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1220         slow_attr->esw_attr->split_count = 0;
1221         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1222
1223         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1224         if (!IS_ERR(rule))
1225                 flow_flag_set(flow, SLOW);
1226
1227         kfree(slow_attr);
1228
1229         return rule;
1230 }
1231
1232 static void
1233 mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1234                                   struct mlx5e_tc_flow *flow)
1235 {
1236         struct mlx5_flow_attr *slow_attr;
1237
1238         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1239         if (!slow_attr) {
1240                 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1241                 return;
1242         }
1243
1244         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1245         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1246         slow_attr->esw_attr->split_count = 0;
1247         slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1248         mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1249         flow_flag_clear(flow, SLOW);
1250         kfree(slow_attr);
1251 }
1252
1253 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1254  * function.
1255  */
1256 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1257                              struct list_head *unready_flows)
1258 {
1259         flow_flag_set(flow, NOT_READY);
1260         list_add_tail(&flow->unready, unready_flows);
1261 }
1262
1263 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1264  * function.
1265  */
1266 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1267 {
1268         list_del(&flow->unready);
1269         flow_flag_clear(flow, NOT_READY);
1270 }
1271
1272 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1273 {
1274         struct mlx5_rep_uplink_priv *uplink_priv;
1275         struct mlx5e_rep_priv *rpriv;
1276         struct mlx5_eswitch *esw;
1277
1278         esw = flow->priv->mdev->priv.eswitch;
1279         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1280         uplink_priv = &rpriv->uplink_priv;
1281
1282         mutex_lock(&uplink_priv->unready_flows_lock);
1283         unready_flow_add(flow, &uplink_priv->unready_flows);
1284         mutex_unlock(&uplink_priv->unready_flows_lock);
1285 }
1286
1287 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1288 {
1289         struct mlx5_rep_uplink_priv *uplink_priv;
1290         struct mlx5e_rep_priv *rpriv;
1291         struct mlx5_eswitch *esw;
1292
1293         esw = flow->priv->mdev->priv.eswitch;
1294         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1295         uplink_priv = &rpriv->uplink_priv;
1296
1297         mutex_lock(&uplink_priv->unready_flows_lock);
1298         unready_flow_del(flow);
1299         mutex_unlock(&uplink_priv->unready_flows_lock);
1300 }
1301
1302 static int
1303 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1304                       struct mlx5e_tc_flow *flow,
1305                       struct netlink_ext_ack *extack)
1306 {
1307         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1308         struct net_device *out_dev, *encap_dev = NULL;
1309         struct mlx5e_tc_flow_parse_attr *parse_attr;
1310         struct mlx5_flow_attr *attr = flow->attr;
1311         struct mlx5_esw_flow_attr *esw_attr;
1312         struct mlx5_fc *counter = NULL;
1313         struct mlx5e_rep_priv *rpriv;
1314         struct mlx5e_priv *out_priv;
1315         bool encap_valid = true;
1316         u32 max_prio, max_chain;
1317         int err = 0;
1318         int out_index;
1319
1320         if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
1321                 NL_SET_ERR_MSG_MOD(extack,
1322                                    "E-switch priorities unsupported, upgrade FW");
1323                 return -EOPNOTSUPP;
1324         }
1325
1326         /* We check chain range only for tc flows.
1327          * For ft flows, we checked attr->chain was originally 0 and set it to
1328          * FDB_FT_CHAIN which is outside tc range.
1329          * See mlx5e_rep_setup_ft_cb().
1330          */
1331         max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1332         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1333                 NL_SET_ERR_MSG_MOD(extack,
1334                                    "Requested chain is out of supported range");
1335                 return -EOPNOTSUPP;
1336         }
1337
1338         max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1339         if (attr->prio > max_prio) {
1340                 NL_SET_ERR_MSG_MOD(extack,
1341                                    "Requested priority is out of supported range");
1342                 return -EOPNOTSUPP;
1343         }
1344
1345         if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1346                 err = mlx5e_attach_decap(priv, flow, extack);
1347                 if (err)
1348                         return err;
1349         }
1350
1351         parse_attr = attr->parse_attr;
1352         esw_attr = attr->esw_attr;
1353
1354         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1355                 int mirred_ifindex;
1356
1357                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1358                         continue;
1359
1360                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1361                 out_dev = __dev_get_by_index(dev_net(priv->netdev),
1362                                              mirred_ifindex);
1363                 err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1364                                          extack, &encap_dev, &encap_valid);
1365                 if (err)
1366                         return err;
1367
1368                 out_priv = netdev_priv(encap_dev);
1369                 rpriv = out_priv->ppriv;
1370                 esw_attr->dests[out_index].rep = rpriv->rep;
1371                 esw_attr->dests[out_index].mdev = out_priv->mdev;
1372         }
1373
1374         err = mlx5_eswitch_add_vlan_action(esw, attr);
1375         if (err)
1376                 return err;
1377
1378         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1379             !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
1380                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1381                 dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1382                 if (err)
1383                         return err;
1384         }
1385
1386         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1387                 counter = mlx5_fc_create(esw_attr->counter_dev, true);
1388                 if (IS_ERR(counter))
1389                         return PTR_ERR(counter);
1390
1391                 attr->counter = counter;
1392         }
1393
1394         /* we get here if one of the following takes place:
1395          * (1) there's no error
1396          * (2) there's an encap action and we don't have valid neigh
1397          */
1398         if (!encap_valid)
1399                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1400         else
1401                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1402
1403         if (IS_ERR(flow->rule[0]))
1404                 return PTR_ERR(flow->rule[0]);
1405         else
1406                 flow_flag_set(flow, OFFLOADED);
1407
1408         return 0;
1409 }
1410
1411 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1412 {
1413         struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1414         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1415                                        spec->match_value,
1416                                        misc_parameters_3);
1417         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1418                                              headers_v,
1419                                              geneve_tlv_option_0_data);
1420
1421         return !!geneve_tlv_opt_0_data;
1422 }
1423
1424 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1425                                   struct mlx5e_tc_flow *flow)
1426 {
1427         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1428         struct mlx5_flow_attr *attr = flow->attr;
1429         int out_index;
1430
1431         mlx5e_put_flow_tunnel_id(flow);
1432
1433         if (flow_flag_test(flow, NOT_READY))
1434                 remove_unready_flow(flow);
1435
1436         if (mlx5e_is_offloaded_flow(flow)) {
1437                 if (flow_flag_test(flow, SLOW))
1438                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1439                 else
1440                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1441         }
1442
1443         if (mlx5_flow_has_geneve_opt(flow))
1444                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1445
1446         mlx5_eswitch_del_vlan_action(esw, attr);
1447
1448         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1449                 if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1450                         mlx5e_detach_encap(priv, flow, out_index);
1451                         kfree(attr->parse_attr->tun_info[out_index]);
1452                 }
1453         kvfree(attr->parse_attr);
1454
1455         mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1456
1457         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1458                 mlx5e_detach_mod_hdr(priv, flow);
1459
1460         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1461                 mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
1462
1463         if (flow_flag_test(flow, L3_TO_L2_DECAP))
1464                 mlx5e_detach_decap(priv, flow);
1465
1466         kfree(flow->attr);
1467 }
1468
1469 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1470                               struct mlx5e_encap_entry *e,
1471                               struct list_head *flow_list)
1472 {
1473         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1474         struct mlx5_esw_flow_attr *esw_attr;
1475         struct mlx5_flow_handle *rule;
1476         struct mlx5_flow_attr *attr;
1477         struct mlx5_flow_spec *spec;
1478         struct mlx5e_tc_flow *flow;
1479         int err;
1480
1481         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1482                                                      e->reformat_type,
1483                                                      e->encap_size, e->encap_header,
1484                                                      MLX5_FLOW_NAMESPACE_FDB);
1485         if (IS_ERR(e->pkt_reformat)) {
1486                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1487                                PTR_ERR(e->pkt_reformat));
1488                 return;
1489         }
1490         e->flags |= MLX5_ENCAP_ENTRY_VALID;
1491         mlx5e_rep_queue_neigh_stats_work(priv);
1492
1493         list_for_each_entry(flow, flow_list, tmp_list) {
1494                 bool all_flow_encaps_valid = true;
1495                 int i;
1496
1497                 if (!mlx5e_is_offloaded_flow(flow))
1498                         continue;
1499                 attr = flow->attr;
1500                 esw_attr = attr->esw_attr;
1501                 spec = &attr->parse_attr->spec;
1502
1503                 esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1504                 esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1505                 /* Flow can be associated with multiple encap entries.
1506                  * Before offloading the flow verify that all of them have
1507                  * a valid neighbour.
1508                  */
1509                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1510                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1511                                 continue;
1512                         if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1513                                 all_flow_encaps_valid = false;
1514                                 break;
1515                         }
1516                 }
1517                 /* Do not offload flows with unresolved neighbors */
1518                 if (!all_flow_encaps_valid)
1519                         continue;
1520                 /* update from slow path rule to encap rule */
1521                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1522                 if (IS_ERR(rule)) {
1523                         err = PTR_ERR(rule);
1524                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1525                                        err);
1526                         continue;
1527                 }
1528
1529                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1530                 flow->rule[0] = rule;
1531                 /* was unset when slow path rule removed */
1532                 flow_flag_set(flow, OFFLOADED);
1533         }
1534 }
1535
1536 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1537                               struct mlx5e_encap_entry *e,
1538                               struct list_head *flow_list)
1539 {
1540         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1541         struct mlx5_esw_flow_attr *esw_attr;
1542         struct mlx5_flow_handle *rule;
1543         struct mlx5_flow_attr *attr;
1544         struct mlx5_flow_spec *spec;
1545         struct mlx5e_tc_flow *flow;
1546         int err;
1547
1548         list_for_each_entry(flow, flow_list, tmp_list) {
1549                 if (!mlx5e_is_offloaded_flow(flow))
1550                         continue;
1551                 attr = flow->attr;
1552                 esw_attr = attr->esw_attr;
1553                 spec = &attr->parse_attr->spec;
1554
1555                 /* update from encap rule to slow path rule */
1556                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1557                 /* mark the flow's encap dest as non-valid */
1558                 esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1559
1560                 if (IS_ERR(rule)) {
1561                         err = PTR_ERR(rule);
1562                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1563                                        err);
1564                         continue;
1565                 }
1566
1567                 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1568                 flow->rule[0] = rule;
1569                 /* was unset when fast path rule removed */
1570                 flow_flag_set(flow, OFFLOADED);
1571         }
1572
1573         /* we know that the encap is valid */
1574         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1575         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1576 }
1577
1578 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1579 {
1580         return flow->attr->counter;
1581 }
1582
1583 /* Takes reference to all flows attached to encap and adds the flows to
1584  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1585  */
1586 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1587 {
1588         struct encap_flow_item *efi;
1589         struct mlx5e_tc_flow *flow;
1590
1591         list_for_each_entry(efi, &e->flows, list) {
1592                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1593                 if (IS_ERR(mlx5e_flow_get(flow)))
1594                         continue;
1595                 wait_for_completion(&flow->init_done);
1596
1597                 flow->tmp_efi_index = efi->index;
1598                 list_add(&flow->tmp_list, flow_list);
1599         }
1600 }
1601
1602 /* Iterate over tmp_list of flows attached to flow_list head. */
1603 void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1604 {
1605         struct mlx5e_tc_flow *flow, *tmp;
1606
1607         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1608                 mlx5e_flow_put(priv, flow);
1609 }
1610
1611 static struct mlx5e_encap_entry *
1612 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1613                            struct mlx5e_encap_entry *e)
1614 {
1615         struct mlx5e_encap_entry *next = NULL;
1616
1617 retry:
1618         rcu_read_lock();
1619
1620         /* find encap with non-zero reference counter value */
1621         for (next = e ?
1622                      list_next_or_null_rcu(&nhe->encap_list,
1623                                            &e->encap_list,
1624                                            struct mlx5e_encap_entry,
1625                                            encap_list) :
1626                      list_first_or_null_rcu(&nhe->encap_list,
1627                                             struct mlx5e_encap_entry,
1628                                             encap_list);
1629              next;
1630              next = list_next_or_null_rcu(&nhe->encap_list,
1631                                           &next->encap_list,
1632                                           struct mlx5e_encap_entry,
1633                                           encap_list))
1634                 if (mlx5e_encap_take(next))
1635                         break;
1636
1637         rcu_read_unlock();
1638
1639         /* release starting encap */
1640         if (e)
1641                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
1642         if (!next)
1643                 return next;
1644
1645         /* wait for encap to be fully initialized */
1646         wait_for_completion(&next->res_ready);
1647         /* continue searching if encap entry is not in valid state after completion */
1648         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1649                 e = next;
1650                 goto retry;
1651         }
1652
1653         return next;
1654 }
1655
1656 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1657 {
1658         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1659         struct mlx5e_encap_entry *e = NULL;
1660         struct mlx5e_tc_flow *flow;
1661         struct mlx5_fc *counter;
1662         struct neigh_table *tbl;
1663         bool neigh_used = false;
1664         struct neighbour *n;
1665         u64 lastuse;
1666
1667         if (m_neigh->family == AF_INET)
1668                 tbl = &arp_tbl;
1669 #if IS_ENABLED(CONFIG_IPV6)
1670         else if (m_neigh->family == AF_INET6)
1671                 tbl = ipv6_stub->nd_tbl;
1672 #endif
1673         else
1674                 return;
1675
1676         /* mlx5e_get_next_valid_encap() releases previous encap before returning
1677          * next one.
1678          */
1679         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1680                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1681                 struct encap_flow_item *efi, *tmp;
1682                 struct mlx5_eswitch *esw;
1683                 LIST_HEAD(flow_list);
1684
1685                 esw = priv->mdev->priv.eswitch;
1686                 mutex_lock(&esw->offloads.encap_tbl_lock);
1687                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1688                         flow = container_of(efi, struct mlx5e_tc_flow,
1689                                             encaps[efi->index]);
1690                         if (IS_ERR(mlx5e_flow_get(flow)))
1691                                 continue;
1692                         list_add(&flow->tmp_list, &flow_list);
1693
1694                         if (mlx5e_is_offloaded_flow(flow)) {
1695                                 counter = mlx5e_tc_get_counter(flow);
1696                                 lastuse = mlx5_fc_query_lastuse(counter);
1697                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1698                                         neigh_used = true;
1699                                         break;
1700                                 }
1701                         }
1702                 }
1703                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1704
1705                 mlx5e_put_encap_flow_list(priv, &flow_list);
1706                 if (neigh_used) {
1707                         /* release current encap before breaking the loop */
1708                         mlx5e_encap_put(priv, e);
1709                         break;
1710                 }
1711         }
1712
1713         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1714
1715         if (neigh_used) {
1716                 nhe->reported_lastuse = jiffies;
1717
1718                 /* find the relevant neigh according to the cached device and
1719                  * dst ip pair
1720                  */
1721                 n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1722                 if (!n)
1723                         return;
1724
1725                 neigh_event_send(n, NULL);
1726                 neigh_release(n);
1727         }
1728 }
1729
1730 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1731 {
1732         WARN_ON(!list_empty(&e->flows));
1733
1734         if (e->compl_result > 0) {
1735                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1736
1737                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1738                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1739         }
1740
1741         kfree(e->tun_info);
1742         kfree(e->encap_header);
1743         kfree_rcu(e, rcu);
1744 }
1745
1746 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
1747                                 struct mlx5e_decap_entry *d)
1748 {
1749         WARN_ON(!list_empty(&d->flows));
1750
1751         if (!d->compl_result)
1752                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
1753
1754         kfree_rcu(d, rcu);
1755 }
1756
1757 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1758 {
1759         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1760
1761         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1762                 return;
1763         hash_del_rcu(&e->encap_hlist);
1764         mutex_unlock(&esw->offloads.encap_tbl_lock);
1765
1766         mlx5e_encap_dealloc(priv, e);
1767 }
1768
1769 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
1770 {
1771         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1772
1773         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
1774                 return;
1775         hash_del_rcu(&d->hlist);
1776         mutex_unlock(&esw->offloads.decap_tbl_lock);
1777
1778         mlx5e_decap_dealloc(priv, d);
1779 }
1780
1781 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1782                                struct mlx5e_tc_flow *flow, int out_index)
1783 {
1784         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1785         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1786
1787         /* flow wasn't fully initialized */
1788         if (!e)
1789                 return;
1790
1791         mutex_lock(&esw->offloads.encap_tbl_lock);
1792         list_del(&flow->encaps[out_index].list);
1793         flow->encaps[out_index].e = NULL;
1794         if (!refcount_dec_and_test(&e->refcnt)) {
1795                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1796                 return;
1797         }
1798         hash_del_rcu(&e->encap_hlist);
1799         mutex_unlock(&esw->offloads.encap_tbl_lock);
1800
1801         mlx5e_encap_dealloc(priv, e);
1802 }
1803
1804 static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1805                                struct mlx5e_tc_flow *flow)
1806 {
1807         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1808         struct mlx5e_decap_entry *d = flow->decap_reformat;
1809
1810         if (!d)
1811                 return;
1812
1813         mutex_lock(&esw->offloads.decap_tbl_lock);
1814         list_del(&flow->l3_to_l2_reformat);
1815         flow->decap_reformat = NULL;
1816
1817         if (!refcount_dec_and_test(&d->refcnt)) {
1818                 mutex_unlock(&esw->offloads.decap_tbl_lock);
1819                 return;
1820         }
1821         hash_del_rcu(&d->hlist);
1822         mutex_unlock(&esw->offloads.decap_tbl_lock);
1823
1824         mlx5e_decap_dealloc(priv, d);
1825 }
1826
1827 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1828 {
1829         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1830
1831         if (!flow_flag_test(flow, ESWITCH) ||
1832             !flow_flag_test(flow, DUP))
1833                 return;
1834
1835         mutex_lock(&esw->offloads.peer_mutex);
1836         list_del(&flow->peer);
1837         mutex_unlock(&esw->offloads.peer_mutex);
1838
1839         flow_flag_clear(flow, DUP);
1840
1841         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1842                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1843                 kfree(flow->peer_flow);
1844         }
1845
1846         flow->peer_flow = NULL;
1847 }
1848
1849 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1850 {
1851         struct mlx5_core_dev *dev = flow->priv->mdev;
1852         struct mlx5_devcom *devcom = dev->priv.devcom;
1853         struct mlx5_eswitch *peer_esw;
1854
1855         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1856         if (!peer_esw)
1857                 return;
1858
1859         __mlx5e_tc_del_fdb_peer_flow(flow);
1860         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1861 }
1862
1863 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1864                               struct mlx5e_tc_flow *flow)
1865 {
1866         if (mlx5e_is_eswitch_flow(flow)) {
1867                 mlx5e_tc_del_fdb_peer_flow(flow);
1868                 mlx5e_tc_del_fdb_flow(priv, flow);
1869         } else {
1870                 mlx5e_tc_del_nic_flow(priv, flow);
1871         }
1872 }
1873
1874 static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
1875 {
1876         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1877         struct flow_action *flow_action = &rule->action;
1878         const struct flow_action_entry *act;
1879         int i;
1880
1881         flow_action_for_each(i, act, flow_action) {
1882                 switch (act->id) {
1883                 case FLOW_ACTION_GOTO:
1884                         return true;
1885                 default:
1886                         continue;
1887                 }
1888         }
1889
1890         return false;
1891 }
1892
1893 static int
1894 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1895                                     struct flow_dissector_key_enc_opts *opts,
1896                                     struct netlink_ext_ack *extack,
1897                                     bool *dont_care)
1898 {
1899         struct geneve_opt *opt;
1900         int off = 0;
1901
1902         *dont_care = true;
1903
1904         while (opts->len > off) {
1905                 opt = (struct geneve_opt *)&opts->data[off];
1906
1907                 if (!(*dont_care) || opt->opt_class || opt->type ||
1908                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1909                         *dont_care = false;
1910
1911                         if (opt->opt_class != htons(U16_MAX) ||
1912                             opt->type != U8_MAX) {
1913                                 NL_SET_ERR_MSG(extack,
1914                                                "Partial match of tunnel options in chain > 0 isn't supported");
1915                                 netdev_warn(priv->netdev,
1916                                             "Partial match of tunnel options in chain > 0 isn't supported");
1917                                 return -EOPNOTSUPP;
1918                         }
1919                 }
1920
1921                 off += sizeof(struct geneve_opt) + opt->length * 4;
1922         }
1923
1924         return 0;
1925 }
1926
1927 #define COPY_DISSECTOR(rule, diss_key, dst)\
1928 ({ \
1929         struct flow_rule *__rule = (rule);\
1930         typeof(dst) __dst = dst;\
1931 \
1932         memcpy(__dst,\
1933                skb_flow_dissector_target(__rule->match.dissector,\
1934                                          diss_key,\
1935                                          __rule->match.key),\
1936                sizeof(*__dst));\
1937 })
1938
1939 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1940                                     struct mlx5e_tc_flow *flow,
1941                                     struct flow_cls_offload *f,
1942                                     struct net_device *filter_dev)
1943 {
1944         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1945         struct netlink_ext_ack *extack = f->common.extack;
1946         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1947         struct flow_match_enc_opts enc_opts_match;
1948         struct tunnel_match_enc_opts tun_enc_opts;
1949         struct mlx5_rep_uplink_priv *uplink_priv;
1950         struct mlx5_flow_attr *attr = flow->attr;
1951         struct mlx5e_rep_priv *uplink_rpriv;
1952         struct tunnel_match_key tunnel_key;
1953         bool enc_opts_is_dont_care = true;
1954         u32 tun_id, enc_opts_id = 0;
1955         struct mlx5_eswitch *esw;
1956         u32 value, mask;
1957         int err;
1958
1959         esw = priv->mdev->priv.eswitch;
1960         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1961         uplink_priv = &uplink_rpriv->uplink_priv;
1962
1963         memset(&tunnel_key, 0, sizeof(tunnel_key));
1964         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1965                        &tunnel_key.enc_control);
1966         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1967                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1968                                &tunnel_key.enc_ipv4);
1969         else
1970                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1971                                &tunnel_key.enc_ipv6);
1972         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1973         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1974                        &tunnel_key.enc_tp);
1975         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1976                        &tunnel_key.enc_key_id);
1977         tunnel_key.filter_ifindex = filter_dev->ifindex;
1978
1979         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1980         if (err)
1981                 return err;
1982
1983         flow_rule_match_enc_opts(rule, &enc_opts_match);
1984         err = enc_opts_is_dont_care_or_full_match(priv,
1985                                                   enc_opts_match.mask,
1986                                                   extack,
1987                                                   &enc_opts_is_dont_care);
1988         if (err)
1989                 goto err_enc_opts;
1990
1991         if (!enc_opts_is_dont_care) {
1992                 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
1993                 memcpy(&tun_enc_opts.key, enc_opts_match.key,
1994                        sizeof(*enc_opts_match.key));
1995                 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
1996                        sizeof(*enc_opts_match.mask));
1997
1998                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
1999                                   &tun_enc_opts, &enc_opts_id);
2000                 if (err)
2001                         goto err_enc_opts;
2002         }
2003
2004         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2005         mask = enc_opts_id ? TUNNEL_ID_MASK :
2006                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2007
2008         if (attr->chain) {
2009                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2010                                             TUNNEL_TO_REG, value, mask);
2011         } else {
2012                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2013                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
2014                                                 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2015                                                 TUNNEL_TO_REG, value);
2016                 if (err)
2017                         goto err_set;
2018
2019                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2020         }
2021
2022         flow->tunnel_id = value;
2023         return 0;
2024
2025 err_set:
2026         if (enc_opts_id)
2027                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2028                                enc_opts_id);
2029 err_enc_opts:
2030         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2031         return err;
2032 }
2033
2034 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2035 {
2036         u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
2037         u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
2038         struct mlx5_rep_uplink_priv *uplink_priv;
2039         struct mlx5e_rep_priv *uplink_rpriv;
2040         struct mlx5_eswitch *esw;
2041
2042         esw = flow->priv->mdev->priv.eswitch;
2043         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2044         uplink_priv = &uplink_rpriv->uplink_priv;
2045
2046         if (tun_id)
2047                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2048         if (enc_opts_id)
2049                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2050                                enc_opts_id);
2051 }
2052
2053 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
2054 {
2055         return flow->tunnel_id;
2056 }
2057
2058 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2059                             struct flow_match_basic *match, bool outer,
2060                             void *headers_c, void *headers_v)
2061 {
2062         bool ip_version_cap;
2063
2064         ip_version_cap = outer ?
2065                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2066                                           ft_field_support.outer_ip_version) :
2067                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2068                                           ft_field_support.inner_ip_version);
2069
2070         if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2071             (match->key->n_proto == htons(ETH_P_IP) ||
2072              match->key->n_proto == htons(ETH_P_IPV6))) {
2073                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2074                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2075                          match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2076         } else {
2077                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2078                          ntohs(match->mask->n_proto));
2079                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2080                          ntohs(match->key->n_proto));
2081         }
2082 }
2083
2084 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2085                              struct mlx5e_tc_flow *flow,
2086                              struct mlx5_flow_spec *spec,
2087                              struct flow_cls_offload *f,
2088                              struct net_device *filter_dev,
2089                              u8 *match_level,
2090                              bool *match_inner)
2091 {
2092         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2093         struct netlink_ext_ack *extack = f->common.extack;
2094         bool needs_mapping, sets_mapping;
2095         int err;
2096
2097         if (!mlx5e_is_eswitch_flow(flow))
2098                 return -EOPNOTSUPP;
2099
2100         needs_mapping = !!flow->attr->chain;
2101         sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
2102         *match_inner = !needs_mapping;
2103
2104         if ((needs_mapping || sets_mapping) &&
2105             !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2106                 NL_SET_ERR_MSG(extack,
2107                                "Chains on tunnel devices isn't supported without register loopback support");
2108                 netdev_warn(priv->netdev,
2109                             "Chains on tunnel devices isn't supported without register loopback support");
2110                 return -EOPNOTSUPP;
2111         }
2112
2113         if (!flow->attr->chain) {
2114                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2115                                          match_level);
2116                 if (err) {
2117                         NL_SET_ERR_MSG_MOD(extack,
2118                                            "Failed to parse tunnel attributes");
2119                         netdev_warn(priv->netdev,
2120                                     "Failed to parse tunnel attributes");
2121                         return err;
2122                 }
2123
2124                 /* With mpls over udp we decapsulate using packet reformat
2125                  * object
2126                  */
2127                 if (!netif_is_bareudp(filter_dev))
2128                         flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2129         }
2130
2131         if (!needs_mapping && !sets_mapping)
2132                 return 0;
2133
2134         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2135 }
2136
2137 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2138 {
2139         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2140                             inner_headers);
2141 }
2142
2143 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2144 {
2145         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2146                             inner_headers);
2147 }
2148
2149 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2150 {
2151         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2152                             outer_headers);
2153 }
2154
2155 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2156 {
2157         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2158                             outer_headers);
2159 }
2160
2161 static void *get_match_headers_value(u32 flags,
2162                                      struct mlx5_flow_spec *spec)
2163 {
2164         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2165                 get_match_inner_headers_value(spec) :
2166                 get_match_outer_headers_value(spec);
2167 }
2168
2169 static void *get_match_headers_criteria(u32 flags,
2170                                         struct mlx5_flow_spec *spec)
2171 {
2172         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2173                 get_match_inner_headers_criteria(spec) :
2174                 get_match_outer_headers_criteria(spec);
2175 }
2176
2177 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2178                                    struct flow_cls_offload *f)
2179 {
2180         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2181         struct netlink_ext_ack *extack = f->common.extack;
2182         struct net_device *ingress_dev;
2183         struct flow_match_meta match;
2184
2185         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2186                 return 0;
2187
2188         flow_rule_match_meta(rule, &match);
2189         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2190                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2191                 return -EOPNOTSUPP;
2192         }
2193
2194         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2195                                          match.key->ingress_ifindex);
2196         if (!ingress_dev) {
2197                 NL_SET_ERR_MSG_MOD(extack,
2198                                    "Can't find the ingress port to match on");
2199                 return -ENOENT;
2200         }
2201
2202         if (ingress_dev != filter_dev) {
2203                 NL_SET_ERR_MSG_MOD(extack,
2204                                    "Can't match on the ingress filter port");
2205                 return -EOPNOTSUPP;
2206         }
2207
2208         return 0;
2209 }
2210
2211 static bool skip_key_basic(struct net_device *filter_dev,
2212                            struct flow_cls_offload *f)
2213 {
2214         /* When doing mpls over udp decap, the user needs to provide
2215          * MPLS_UC as the protocol in order to be able to match on mpls
2216          * label fields.  However, the actual ethertype is IP so we want to
2217          * avoid matching on this, otherwise we'll fail the match.
2218          */
2219         if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2220                 return true;
2221
2222         return false;
2223 }
2224
2225 static int __parse_cls_flower(struct mlx5e_priv *priv,
2226                               struct mlx5e_tc_flow *flow,
2227                               struct mlx5_flow_spec *spec,
2228                               struct flow_cls_offload *f,
2229                               struct net_device *filter_dev,
2230                               u8 *inner_match_level, u8 *outer_match_level)
2231 {
2232         struct netlink_ext_ack *extack = f->common.extack;
2233         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2234                                        outer_headers);
2235         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2236                                        outer_headers);
2237         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2238                                     misc_parameters);
2239         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2240                                     misc_parameters);
2241         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2242         struct flow_dissector *dissector = rule->match.dissector;
2243         u16 addr_type = 0;
2244         u8 ip_proto = 0;
2245         u8 *match_level;
2246         int err;
2247
2248         match_level = outer_match_level;
2249
2250         if (dissector->used_keys &
2251             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2252               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2253               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2254               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2255               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2256               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2257               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2258               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2259               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2260               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2261               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2262               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2263               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2264               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2265               BIT(FLOW_DISSECTOR_KEY_TCP) |
2266               BIT(FLOW_DISSECTOR_KEY_IP)  |
2267               BIT(FLOW_DISSECTOR_KEY_CT) |
2268               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2269               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2270               BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2271                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2272                 netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
2273                             dissector->used_keys);
2274                 return -EOPNOTSUPP;
2275         }
2276
2277         if (mlx5e_get_tc_tun(filter_dev)) {
2278                 bool match_inner = false;
2279
2280                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2281                                         outer_match_level, &match_inner);
2282                 if (err)
2283                         return err;
2284
2285                 if (match_inner) {
2286                         /* header pointers should point to the inner headers
2287                          * if the packet was decapsulated already.
2288                          * outer headers are set by parse_tunnel_attr.
2289                          */
2290                         match_level = inner_match_level;
2291                         headers_c = get_match_inner_headers_criteria(spec);
2292                         headers_v = get_match_inner_headers_value(spec);
2293                 }
2294         }
2295
2296         err = mlx5e_flower_parse_meta(filter_dev, f);
2297         if (err)
2298                 return err;
2299
2300         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2301             !skip_key_basic(filter_dev, f)) {
2302                 struct flow_match_basic match;
2303
2304                 flow_rule_match_basic(rule, &match);
2305                 mlx5e_tc_set_ethertype(priv->mdev, &match,
2306                                        match_level == outer_match_level,
2307                                        headers_c, headers_v);
2308
2309                 if (match.mask->n_proto)
2310                         *match_level = MLX5_MATCH_L2;
2311         }
2312         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2313             is_vlan_dev(filter_dev)) {
2314                 struct flow_dissector_key_vlan filter_dev_mask;
2315                 struct flow_dissector_key_vlan filter_dev_key;
2316                 struct flow_match_vlan match;
2317
2318                 if (is_vlan_dev(filter_dev)) {
2319                         match.key = &filter_dev_key;
2320                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2321                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2322                         match.key->vlan_priority = 0;
2323                         match.mask = &filter_dev_mask;
2324                         memset(match.mask, 0xff, sizeof(*match.mask));
2325                         match.mask->vlan_priority = 0;
2326                 } else {
2327                         flow_rule_match_vlan(rule, &match);
2328                 }
2329                 if (match.mask->vlan_id ||
2330                     match.mask->vlan_priority ||
2331                     match.mask->vlan_tpid) {
2332                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2333                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2334                                          svlan_tag, 1);
2335                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2336                                          svlan_tag, 1);
2337                         } else {
2338                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2339                                          cvlan_tag, 1);
2340                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2341                                          cvlan_tag, 1);
2342                         }
2343
2344                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2345                                  match.mask->vlan_id);
2346                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2347                                  match.key->vlan_id);
2348
2349                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2350                                  match.mask->vlan_priority);
2351                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2352                                  match.key->vlan_priority);
2353
2354                         *match_level = MLX5_MATCH_L2;
2355                 }
2356         } else if (*match_level != MLX5_MATCH_NONE) {
2357                 /* cvlan_tag enabled in match criteria and
2358                  * disabled in match value means both S & C tags
2359                  * don't exist (untagged of both)
2360                  */
2361                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2362                 *match_level = MLX5_MATCH_L2;
2363         }
2364
2365         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2366                 struct flow_match_vlan match;
2367
2368                 flow_rule_match_cvlan(rule, &match);
2369                 if (match.mask->vlan_id ||
2370                     match.mask->vlan_priority ||
2371                     match.mask->vlan_tpid) {
2372                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2373                                 MLX5_SET(fte_match_set_misc, misc_c,
2374                                          outer_second_svlan_tag, 1);
2375                                 MLX5_SET(fte_match_set_misc, misc_v,
2376                                          outer_second_svlan_tag, 1);
2377                         } else {
2378                                 MLX5_SET(fte_match_set_misc, misc_c,
2379                                          outer_second_cvlan_tag, 1);
2380                                 MLX5_SET(fte_match_set_misc, misc_v,
2381                                          outer_second_cvlan_tag, 1);
2382                         }
2383
2384                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2385                                  match.mask->vlan_id);
2386                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2387                                  match.key->vlan_id);
2388                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2389                                  match.mask->vlan_priority);
2390                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2391                                  match.key->vlan_priority);
2392
2393                         *match_level = MLX5_MATCH_L2;
2394                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2395                 }
2396         }
2397
2398         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2399                 struct flow_match_eth_addrs match;
2400
2401                 flow_rule_match_eth_addrs(rule, &match);
2402                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2403                                              dmac_47_16),
2404                                 match.mask->dst);
2405                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2406                                              dmac_47_16),
2407                                 match.key->dst);
2408
2409                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2410                                              smac_47_16),
2411                                 match.mask->src);
2412                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2413                                              smac_47_16),
2414                                 match.key->src);
2415
2416                 if (!is_zero_ether_addr(match.mask->src) ||
2417                     !is_zero_ether_addr(match.mask->dst))
2418                         *match_level = MLX5_MATCH_L2;
2419         }
2420
2421         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2422                 struct flow_match_control match;
2423
2424                 flow_rule_match_control(rule, &match);
2425                 addr_type = match.key->addr_type;
2426
2427                 /* the HW doesn't support frag first/later */
2428                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2429                         return -EOPNOTSUPP;
2430
2431                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2432                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2433                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2434                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2435
2436                         /* the HW doesn't need L3 inline to match on frag=no */
2437                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2438                                 *match_level = MLX5_MATCH_L2;
2439         /* ***  L2 attributes parsing up to here *** */
2440                         else
2441                                 *match_level = MLX5_MATCH_L3;
2442                 }
2443         }
2444
2445         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2446                 struct flow_match_basic match;
2447
2448                 flow_rule_match_basic(rule, &match);
2449                 ip_proto = match.key->ip_proto;
2450
2451                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2452                          match.mask->ip_proto);
2453                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2454                          match.key->ip_proto);
2455
2456                 if (match.mask->ip_proto)
2457                         *match_level = MLX5_MATCH_L3;
2458         }
2459
2460         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2461                 struct flow_match_ipv4_addrs match;
2462
2463                 flow_rule_match_ipv4_addrs(rule, &match);
2464                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2465                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2466                        &match.mask->src, sizeof(match.mask->src));
2467                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2468                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2469                        &match.key->src, sizeof(match.key->src));
2470                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2471                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2472                        &match.mask->dst, sizeof(match.mask->dst));
2473                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2474                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2475                        &match.key->dst, sizeof(match.key->dst));
2476
2477                 if (match.mask->src || match.mask->dst)
2478                         *match_level = MLX5_MATCH_L3;
2479         }
2480
2481         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2482                 struct flow_match_ipv6_addrs match;
2483
2484                 flow_rule_match_ipv6_addrs(rule, &match);
2485                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2486                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2487                        &match.mask->src, sizeof(match.mask->src));
2488                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2489                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2490                        &match.key->src, sizeof(match.key->src));
2491
2492                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2493                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2494                        &match.mask->dst, sizeof(match.mask->dst));
2495                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2496                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2497                        &match.key->dst, sizeof(match.key->dst));
2498
2499                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2500                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2501                         *match_level = MLX5_MATCH_L3;
2502         }
2503
2504         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2505                 struct flow_match_ip match;
2506
2507                 flow_rule_match_ip(rule, &match);
2508                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2509                          match.mask->tos & 0x3);
2510                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2511                          match.key->tos & 0x3);
2512
2513                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2514                          match.mask->tos >> 2);
2515                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2516                          match.key->tos  >> 2);
2517
2518                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2519                          match.mask->ttl);
2520                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2521                          match.key->ttl);
2522
2523                 if (match.mask->ttl &&
2524                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2525                                                 ft_field_support.outer_ipv4_ttl)) {
2526                         NL_SET_ERR_MSG_MOD(extack,
2527                                            "Matching on TTL is not supported");
2528                         return -EOPNOTSUPP;
2529                 }
2530
2531                 if (match.mask->tos || match.mask->ttl)
2532                         *match_level = MLX5_MATCH_L3;
2533         }
2534
2535         /* ***  L3 attributes parsing up to here *** */
2536
2537         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2538                 struct flow_match_ports match;
2539
2540                 flow_rule_match_ports(rule, &match);
2541                 switch (ip_proto) {
2542                 case IPPROTO_TCP:
2543                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2544                                  tcp_sport, ntohs(match.mask->src));
2545                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2546                                  tcp_sport, ntohs(match.key->src));
2547
2548                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2549                                  tcp_dport, ntohs(match.mask->dst));
2550                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2551                                  tcp_dport, ntohs(match.key->dst));
2552                         break;
2553
2554                 case IPPROTO_UDP:
2555                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2556                                  udp_sport, ntohs(match.mask->src));
2557                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2558                                  udp_sport, ntohs(match.key->src));
2559
2560                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2561                                  udp_dport, ntohs(match.mask->dst));
2562                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2563                                  udp_dport, ntohs(match.key->dst));
2564                         break;
2565                 default:
2566                         NL_SET_ERR_MSG_MOD(extack,
2567                                            "Only UDP and TCP transports are supported for L4 matching");
2568                         netdev_err(priv->netdev,
2569                                    "Only UDP and TCP transport are supported\n");
2570                         return -EINVAL;
2571                 }
2572
2573                 if (match.mask->src || match.mask->dst)
2574                         *match_level = MLX5_MATCH_L4;
2575         }
2576
2577         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2578                 struct flow_match_tcp match;
2579
2580                 flow_rule_match_tcp(rule, &match);
2581                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2582                          ntohs(match.mask->flags));
2583                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2584                          ntohs(match.key->flags));
2585
2586                 if (match.mask->flags)
2587                         *match_level = MLX5_MATCH_L4;
2588         }
2589
2590         return 0;
2591 }
2592
2593 static int parse_cls_flower(struct mlx5e_priv *priv,
2594                             struct mlx5e_tc_flow *flow,
2595                             struct mlx5_flow_spec *spec,
2596                             struct flow_cls_offload *f,
2597                             struct net_device *filter_dev)
2598 {
2599         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2600         struct netlink_ext_ack *extack = f->common.extack;
2601         struct mlx5_core_dev *dev = priv->mdev;
2602         struct mlx5_eswitch *esw = dev->priv.eswitch;
2603         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2604         struct mlx5_eswitch_rep *rep;
2605         bool is_eswitch_flow;
2606         int err;
2607
2608         inner_match_level = MLX5_MATCH_NONE;
2609         outer_match_level = MLX5_MATCH_NONE;
2610
2611         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2612                                  &inner_match_level, &outer_match_level);
2613         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2614                                  outer_match_level : inner_match_level;
2615
2616         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2617         if (!err && is_eswitch_flow) {
2618                 rep = rpriv->rep;
2619                 if (rep->vport != MLX5_VPORT_UPLINK &&
2620                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2621                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2622                         NL_SET_ERR_MSG_MOD(extack,
2623                                            "Flow is not offloaded due to min inline setting");
2624                         netdev_warn(priv->netdev,
2625                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2626                                     non_tunnel_match_level, esw->offloads.inline_mode);
2627                         return -EOPNOTSUPP;
2628                 }
2629         }
2630
2631         flow->attr->inner_match_level = inner_match_level;
2632         flow->attr->outer_match_level = outer_match_level;
2633
2634
2635         return err;
2636 }
2637
2638 struct pedit_headers {
2639         struct ethhdr  eth;
2640         struct vlan_hdr vlan;
2641         struct iphdr   ip4;
2642         struct ipv6hdr ip6;
2643         struct tcphdr  tcp;
2644         struct udphdr  udp;
2645 };
2646
2647 struct pedit_headers_action {
2648         struct pedit_headers    vals;
2649         struct pedit_headers    masks;
2650         u32                     pedits;
2651 };
2652
2653 static int pedit_header_offsets[] = {
2654         [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2655         [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2656         [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2657         [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2658         [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2659 };
2660
2661 #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2662
2663 static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2664                          struct pedit_headers_action *hdrs)
2665 {
2666         u32 *curr_pmask, *curr_pval;
2667
2668         curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2669         curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2670
2671         if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2672                 goto out_err;
2673
2674         *curr_pmask |= mask;
2675         *curr_pval  |= (val & mask);
2676
2677         return 0;
2678
2679 out_err:
2680         return -EOPNOTSUPP;
2681 }
2682
2683 struct mlx5_fields {
2684         u8  field;
2685         u8  field_bsize;
2686         u32 field_mask;
2687         u32 offset;
2688         u32 match_offset;
2689 };
2690
2691 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2692                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2693                  offsetof(struct pedit_headers, field) + (off), \
2694                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2695
2696 /* masked values are the same and there are no rewrites that do not have a
2697  * match.
2698  */
2699 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2700         type matchmaskx = *(type *)(matchmaskp); \
2701         type matchvalx = *(type *)(matchvalp); \
2702         type maskx = *(type *)(maskp); \
2703         type valx = *(type *)(valp); \
2704         \
2705         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2706                                                                  matchmaskx)); \
2707 })
2708
2709 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2710                          void *matchmaskp, u8 bsize)
2711 {
2712         bool same = false;
2713
2714         switch (bsize) {
2715         case 8:
2716                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2717                 break;
2718         case 16:
2719                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2720                 break;
2721         case 32:
2722                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2723                 break;
2724         }
2725
2726         return same;
2727 }
2728
2729 static struct mlx5_fields fields[] = {
2730         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2731         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2732         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2733         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2734         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2735         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2736
2737         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2738         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2739         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2740         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2741
2742         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2743                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2744         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2745                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2746         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2747                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2748         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2749                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2750         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2751                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2752         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2753                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2754         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2755                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2756         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2757                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2758         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2759         OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
2760
2761         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2762         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2763         /* in linux iphdr tcp_flags is 8 bits long */
2764         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2765
2766         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2767         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2768 };
2769
2770 static unsigned long mask_to_le(unsigned long mask, int size)
2771 {
2772         __be32 mask_be32;
2773         __be16 mask_be16;
2774
2775         if (size == 32) {
2776                 mask_be32 = (__force __be32)(mask);
2777                 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2778         } else if (size == 16) {
2779                 mask_be32 = (__force __be32)(mask);
2780                 mask_be16 = *(__be16 *)&mask_be32;
2781                 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2782         }
2783
2784         return mask;
2785 }
2786 static int offload_pedit_fields(struct mlx5e_priv *priv,
2787                                 int namespace,
2788                                 struct pedit_headers_action *hdrs,
2789                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2790                                 u32 *action_flags,
2791                                 struct netlink_ext_ack *extack)
2792 {
2793         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2794         int i, action_size, first, last, next_z;
2795         void *headers_c, *headers_v, *action, *vals_p;
2796         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2797         struct mlx5e_tc_mod_hdr_acts *mod_acts;
2798         struct mlx5_fields *f;
2799         unsigned long mask, field_mask;
2800         int err;
2801         u8 cmd;
2802
2803         mod_acts = &parse_attr->mod_hdr_acts;
2804         headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2805         headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2806
2807         set_masks = &hdrs[0].masks;
2808         add_masks = &hdrs[1].masks;
2809         set_vals = &hdrs[0].vals;
2810         add_vals = &hdrs[1].vals;
2811
2812         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2813
2814         for (i = 0; i < ARRAY_SIZE(fields); i++) {
2815                 bool skip;
2816
2817                 f = &fields[i];
2818                 /* avoid seeing bits set from previous iterations */
2819                 s_mask = 0;
2820                 a_mask = 0;
2821
2822                 s_masks_p = (void *)set_masks + f->offset;
2823                 a_masks_p = (void *)add_masks + f->offset;
2824
2825                 s_mask = *s_masks_p & f->field_mask;
2826                 a_mask = *a_masks_p & f->field_mask;
2827
2828                 if (!s_mask && !a_mask) /* nothing to offload here */
2829                         continue;
2830
2831                 if (s_mask && a_mask) {
2832                         NL_SET_ERR_MSG_MOD(extack,
2833                                            "can't set and add to the same HW field");
2834                         printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2835                         return -EOPNOTSUPP;
2836                 }
2837
2838                 skip = false;
2839                 if (s_mask) {
2840                         void *match_mask = headers_c + f->match_offset;
2841                         void *match_val = headers_v + f->match_offset;
2842
2843                         cmd  = MLX5_ACTION_TYPE_SET;
2844                         mask = s_mask;
2845                         vals_p = (void *)set_vals + f->offset;
2846                         /* don't rewrite if we have a match on the same value */
2847                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
2848                                          match_mask, f->field_bsize))
2849                                 skip = true;
2850                         /* clear to denote we consumed this field */
2851                         *s_masks_p &= ~f->field_mask;
2852                 } else {
2853                         cmd  = MLX5_ACTION_TYPE_ADD;
2854                         mask = a_mask;
2855                         vals_p = (void *)add_vals + f->offset;
2856                         /* add 0 is no change */
2857                         if ((*(u32 *)vals_p & f->field_mask) == 0)
2858                                 skip = true;
2859                         /* clear to denote we consumed this field */
2860                         *a_masks_p &= ~f->field_mask;
2861                 }
2862                 if (skip)
2863                         continue;
2864
2865                 mask = mask_to_le(mask, f->field_bsize);
2866
2867                 first = find_first_bit(&mask, f->field_bsize);
2868                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2869                 last  = find_last_bit(&mask, f->field_bsize);
2870                 if (first < next_z && next_z < last) {
2871                         NL_SET_ERR_MSG_MOD(extack,
2872                                            "rewrite of few sub-fields isn't supported");
2873                         printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2874                                mask);
2875                         return -EOPNOTSUPP;
2876                 }
2877
2878                 err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2879                 if (err) {
2880                         NL_SET_ERR_MSG_MOD(extack,
2881                                            "too many pedit actions, can't offload");
2882                         mlx5_core_warn(priv->mdev,
2883                                        "mlx5: parsed %d pedit actions, can't do more\n",
2884                                        mod_acts->num_actions);
2885                         return err;
2886                 }
2887
2888                 action = mod_acts->actions +
2889                          (mod_acts->num_actions * action_size);
2890                 MLX5_SET(set_action_in, action, action_type, cmd);
2891                 MLX5_SET(set_action_in, action, field, f->field);
2892
2893                 if (cmd == MLX5_ACTION_TYPE_SET) {
2894                         int start;
2895
2896                         field_mask = mask_to_le(f->field_mask, f->field_bsize);
2897
2898                         /* if field is bit sized it can start not from first bit */
2899                         start = find_first_bit(&field_mask, f->field_bsize);
2900
2901                         MLX5_SET(set_action_in, action, offset, first - start);
2902                         /* length is num of bits to be written, zero means length of 32 */
2903                         MLX5_SET(set_action_in, action, length, (last - first + 1));
2904                 }
2905
2906                 if (f->field_bsize == 32)
2907                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2908                 else if (f->field_bsize == 16)
2909                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2910                 else if (f->field_bsize == 8)
2911                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2912
2913                 ++mod_acts->num_actions;
2914         }
2915
2916         return 0;
2917 }
2918
2919 static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2920                                                   int namespace)
2921 {
2922         if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2923                 return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2924         else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2925                 return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2926 }
2927
2928 int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
2929                           int namespace,
2930                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2931 {
2932         int action_size, new_num_actions, max_hw_actions;
2933         size_t new_sz, old_sz;
2934         void *ret;
2935
2936         if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
2937                 return 0;
2938
2939         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2940
2941         max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
2942                                                                 namespace);
2943         new_num_actions = min(max_hw_actions,
2944                               mod_hdr_acts->actions ?
2945                               mod_hdr_acts->max_actions * 2 : 1);
2946         if (mod_hdr_acts->max_actions == new_num_actions)
2947                 return -ENOSPC;
2948
2949         new_sz = action_size * new_num_actions;
2950         old_sz = mod_hdr_acts->max_actions * action_size;
2951         ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
2952         if (!ret)
2953                 return -ENOMEM;
2954
2955         memset(ret + old_sz, 0, new_sz - old_sz);
2956         mod_hdr_acts->actions = ret;
2957         mod_hdr_acts->max_actions = new_num_actions;
2958
2959         return 0;
2960 }
2961
2962 void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2963 {
2964         kfree(mod_hdr_acts->actions);
2965         mod_hdr_acts->actions = NULL;
2966         mod_hdr_acts->num_actions = 0;
2967         mod_hdr_acts->max_actions = 0;
2968 }
2969
2970 static const struct pedit_headers zero_masks = {};
2971
2972 static int
2973 parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
2974                           const struct flow_action_entry *act, int namespace,
2975                           struct mlx5e_tc_flow_parse_attr *parse_attr,
2976                           struct pedit_headers_action *hdrs,
2977                           struct netlink_ext_ack *extack)
2978 {
2979         u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2980         int err = -EOPNOTSUPP;
2981         u32 mask, val, offset;
2982         u8 htype;
2983
2984         htype = act->mangle.htype;
2985         err = -EOPNOTSUPP; /* can't be all optimistic */
2986
2987         if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2988                 NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2989                 goto out_err;
2990         }
2991
2992         if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2993                 NL_SET_ERR_MSG_MOD(extack,
2994                                    "The pedit offload action is not supported");
2995                 goto out_err;
2996         }
2997
2998         mask = act->mangle.mask;
2999         val = act->mangle.val;
3000         offset = act->mangle.offset;
3001
3002         err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
3003         if (err)
3004                 goto out_err;
3005
3006         hdrs[cmd].pedits++;
3007
3008         return 0;
3009 out_err:
3010         return err;
3011 }
3012
3013 static int
3014 parse_pedit_to_reformat(struct mlx5e_priv *priv,
3015                         const struct flow_action_entry *act,
3016                         struct mlx5e_tc_flow_parse_attr *parse_attr,
3017                         struct netlink_ext_ack *extack)
3018 {
3019         u32 mask, val, offset;
3020         u32 *p;
3021
3022         if (act->id != FLOW_ACTION_MANGLE)
3023                 return -EOPNOTSUPP;
3024
3025         if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
3026                 NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
3027                 return -EOPNOTSUPP;
3028         }
3029
3030         mask = ~act->mangle.mask;
3031         val = act->mangle.val;
3032         offset = act->mangle.offset;
3033         p = (u32 *)&parse_attr->eth;
3034         *(p + (offset >> 2)) |= (val & mask);
3035
3036         return 0;
3037 }
3038
3039 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
3040                                  const struct flow_action_entry *act, int namespace,
3041                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3042                                  struct pedit_headers_action *hdrs,
3043                                  struct mlx5e_tc_flow *flow,
3044                                  struct netlink_ext_ack *extack)
3045 {
3046         if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3047                 return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3048
3049         return parse_pedit_to_modify_hdr(priv, act, namespace,
3050                                          parse_attr, hdrs, extack);
3051 }
3052
3053 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3054                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3055                                  struct pedit_headers_action *hdrs,
3056                                  u32 *action_flags,
3057                                  struct netlink_ext_ack *extack)
3058 {
3059         struct pedit_headers *cmd_masks;
3060         int err;
3061         u8 cmd;
3062
3063         err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3064                                    action_flags, extack);
3065         if (err < 0)
3066                 goto out_dealloc_parsed_actions;
3067
3068         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3069                 cmd_masks = &hdrs[cmd].masks;
3070                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3071                         NL_SET_ERR_MSG_MOD(extack,
3072                                            "attempt to offload an unsupported field");
3073                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3074                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3075                                        16, 1, cmd_masks, sizeof(zero_masks), true);
3076                         err = -EOPNOTSUPP;
3077                         goto out_dealloc_parsed_actions;
3078                 }
3079         }
3080
3081         return 0;
3082
3083 out_dealloc_parsed_actions:
3084         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3085         return err;
3086 }
3087
3088 static bool csum_offload_supported(struct mlx5e_priv *priv,
3089                                    u32 action,
3090                                    u32 update_flags,
3091                                    struct netlink_ext_ack *extack)
3092 {
3093         u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
3094                          TCA_CSUM_UPDATE_FLAG_UDP;
3095
3096         /*  The HW recalcs checksums only if re-writing headers */
3097         if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3098                 NL_SET_ERR_MSG_MOD(extack,
3099                                    "TC csum action is only offloaded with pedit");
3100                 netdev_warn(priv->netdev,
3101                             "TC csum action is only offloaded with pedit\n");
3102                 return false;
3103         }
3104
3105         if (update_flags & ~prot_flags) {
3106                 NL_SET_ERR_MSG_MOD(extack,
3107                                    "can't offload TC csum action for some header/s");
3108                 netdev_warn(priv->netdev,
3109                             "can't offload TC csum action for some header/s - flags %#x\n",
3110                             update_flags);
3111                 return false;
3112         }
3113
3114         return true;
3115 }
3116
3117 struct ip_ttl_word {
3118         __u8    ttl;
3119         __u8    protocol;
3120         __sum16 check;
3121 };
3122
3123 struct ipv6_hoplimit_word {
3124         __be16  payload_len;
3125         __u8    nexthdr;
3126         __u8    hop_limit;
3127 };
3128
3129 static int is_action_keys_supported(const struct flow_action_entry *act,
3130                                     bool ct_flow, bool *modify_ip_header,
3131                                     bool *modify_tuple,
3132                                     struct netlink_ext_ack *extack)
3133 {
3134         u32 mask, offset;
3135         u8 htype;
3136
3137         htype = act->mangle.htype;
3138         offset = act->mangle.offset;
3139         mask = ~act->mangle.mask;
3140         /* For IPv4 & IPv6 header check 4 byte word,
3141          * to determine that modified fields
3142          * are NOT ttl & hop_limit only.
3143          */
3144         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3145                 struct ip_ttl_word *ttl_word =
3146                         (struct ip_ttl_word *)&mask;
3147
3148                 if (offset != offsetof(struct iphdr, ttl) ||
3149                     ttl_word->protocol ||
3150                     ttl_word->check) {
3151                         *modify_ip_header = true;
3152                 }
3153
3154                 if (offset >= offsetof(struct iphdr, saddr))
3155                         *modify_tuple = true;
3156
3157                 if (ct_flow && *modify_tuple) {
3158                         NL_SET_ERR_MSG_MOD(extack,
3159                                            "can't offload re-write of ipv4 address with action ct");
3160                         return -EOPNOTSUPP;
3161                 }
3162         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3163                 struct ipv6_hoplimit_word *hoplimit_word =
3164                         (struct ipv6_hoplimit_word *)&mask;
3165
3166                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3167                     hoplimit_word->payload_len ||
3168                     hoplimit_word->nexthdr) {
3169                         *modify_ip_header = true;
3170                 }
3171
3172                 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3173                         *modify_tuple = true;
3174
3175                 if (ct_flow && *modify_tuple) {
3176                         NL_SET_ERR_MSG_MOD(extack,
3177                                            "can't offload re-write of ipv6 address with action ct");
3178                         return -EOPNOTSUPP;
3179                 }
3180         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3181                    htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3182                 *modify_tuple = true;
3183                 if (ct_flow) {
3184                         NL_SET_ERR_MSG_MOD(extack,
3185                                            "can't offload re-write of transport header ports with action ct");
3186                         return -EOPNOTSUPP;
3187                 }
3188         }
3189
3190         return 0;
3191 }
3192
3193 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3194                                           struct mlx5_flow_spec *spec,
3195                                           struct flow_action *flow_action,
3196                                           u32 actions, bool ct_flow,
3197                                           bool ct_clear,
3198                                           struct netlink_ext_ack *extack)
3199 {
3200         const struct flow_action_entry *act;
3201         bool modify_ip_header, modify_tuple;
3202         void *headers_c;
3203         void *headers_v;
3204         u16 ethertype;
3205         u8 ip_proto;
3206         int i, err;
3207
3208         headers_c = get_match_headers_criteria(actions, spec);
3209         headers_v = get_match_headers_value(actions, spec);
3210         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3211
3212         /* for non-IP we only re-write MACs, so we're okay */
3213         if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3214             ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3215                 goto out_ok;
3216
3217         modify_ip_header = false;
3218         modify_tuple = false;
3219         flow_action_for_each(i, act, flow_action) {
3220                 if (act->id != FLOW_ACTION_MANGLE &&
3221                     act->id != FLOW_ACTION_ADD)
3222                         continue;
3223
3224                 err = is_action_keys_supported(act, ct_flow,
3225                                                &modify_ip_header,
3226                                                &modify_tuple, extack);
3227                 if (err)
3228                         return err;
3229         }
3230
3231         /* Add ct_state=-trk match so it will be offloaded for non ct flows
3232          * (or after clear action), as otherwise, since the tuple is changed,
3233          *  we can't restore ct state
3234          */
3235         if (!ct_clear && modify_tuple &&
3236             mlx5_tc_ct_add_no_trk_match(spec)) {
3237                 NL_SET_ERR_MSG_MOD(extack,
3238                                    "can't offload tuple modify header with ct matches");
3239                 netdev_info(priv->netdev,
3240                             "can't offload tuple modify header with ct matches");
3241                 return false;
3242         }
3243
3244         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3245         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3246             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3247                 NL_SET_ERR_MSG_MOD(extack,
3248                                    "can't offload re-write of non TCP/UDP");
3249                 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3250                             ip_proto);
3251                 return false;
3252         }
3253
3254 out_ok:
3255         return true;
3256 }
3257
3258 static bool actions_match_supported(struct mlx5e_priv *priv,
3259                                     struct flow_action *flow_action,
3260                                     struct mlx5e_tc_flow_parse_attr *parse_attr,
3261                                     struct mlx5e_tc_flow *flow,
3262                                     struct netlink_ext_ack *extack)
3263 {
3264         bool ct_flow = false, ct_clear = false;
3265         u32 actions;
3266
3267         ct_clear = flow->attr->ct_attr.ct_action &
3268                 TCA_CT_ACT_CLEAR;
3269         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3270         actions = flow->attr->action;
3271
3272         if (mlx5e_is_eswitch_flow(flow)) {
3273                 if (flow->attr->esw_attr->split_count && ct_flow) {
3274                         /* All registers used by ct are cleared when using
3275                          * split rules.
3276                          */
3277                         NL_SET_ERR_MSG_MOD(extack,
3278                                            "Can't offload mirroring with action ct");
3279                         return false;
3280                 }
3281         }
3282
3283         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3284                 return modify_header_match_supported(priv, &parse_attr->spec,
3285                                                      flow_action, actions,
3286                                                      ct_flow, ct_clear,
3287                                                      extack);
3288
3289         return true;
3290 }
3291
3292 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3293 {
3294         return priv->mdev == peer_priv->mdev;
3295 }
3296
3297 static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3298 {
3299         struct mlx5_core_dev *fmdev, *pmdev;
3300         u64 fsystem_guid, psystem_guid;
3301
3302         fmdev = priv->mdev;
3303         pmdev = peer_priv->mdev;
3304
3305         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3306         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3307
3308         return (fsystem_guid == psystem_guid);
3309 }
3310
3311 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3312                                    const struct flow_action_entry *act,
3313                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
3314                                    struct pedit_headers_action *hdrs,
3315                                    u32 *action, struct netlink_ext_ack *extack)
3316 {
3317         u16 mask16 = VLAN_VID_MASK;
3318         u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3319         const struct flow_action_entry pedit_act = {
3320                 .id = FLOW_ACTION_MANGLE,
3321                 .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3322                 .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3323                 .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3324                 .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3325         };
3326         u8 match_prio_mask, match_prio_val;
3327         void *headers_c, *headers_v;
3328         int err;
3329
3330         headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3331         headers_v = get_match_headers_value(*action, &parse_attr->spec);
3332
3333         if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3334               MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3335                 NL_SET_ERR_MSG_MOD(extack,
3336                                    "VLAN rewrite action must have VLAN protocol match");
3337                 return -EOPNOTSUPP;
3338         }
3339
3340         match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3341         match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3342         if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3343                 NL_SET_ERR_MSG_MOD(extack,
3344                                    "Changing VLAN prio is not supported");
3345                 return -EOPNOTSUPP;
3346         }
3347
3348         err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3349         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3350
3351         return err;
3352 }
3353
3354 static int
3355 add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3356                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3357                                  struct pedit_headers_action *hdrs,
3358                                  u32 *action, struct netlink_ext_ack *extack)
3359 {
3360         const struct flow_action_entry prio_tag_act = {
3361                 .vlan.vid = 0,
3362                 .vlan.prio =
3363                         MLX5_GET(fte_match_set_lyr_2_4,
3364                                  get_match_headers_value(*action,
3365                                                          &parse_attr->spec),
3366                                  first_prio) &
3367                         MLX5_GET(fte_match_set_lyr_2_4,
3368                                  get_match_headers_criteria(*action,
3369                                                             &parse_attr->spec),
3370                                  first_prio),
3371         };
3372
3373         return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3374                                        &prio_tag_act, parse_attr, hdrs, action,
3375                                        extack);
3376 }
3377
3378 static int validate_goto_chain(struct mlx5e_priv *priv,
3379                                struct mlx5e_tc_flow *flow,
3380                                const struct flow_action_entry *act,
3381                                u32 actions,
3382                                struct netlink_ext_ack *extack)
3383 {
3384         bool is_esw = mlx5e_is_eswitch_flow(flow);
3385         struct mlx5_flow_attr *attr = flow->attr;
3386         bool ft_flow = mlx5e_is_ft_flow(flow);
3387         u32 dest_chain = act->chain_index;
3388         struct mlx5_fs_chains *chains;
3389         struct mlx5_eswitch *esw;
3390         u32 reformat_and_fwd;
3391         u32 max_chain;
3392
3393         esw = priv->mdev->priv.eswitch;
3394         chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3395         max_chain = mlx5_chains_get_chain_range(chains);
3396         reformat_and_fwd = is_esw ?
3397                            MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3398                            MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3399
3400         if (ft_flow) {
3401                 NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3402                 return -EOPNOTSUPP;
3403         }
3404
3405         if (!mlx5_chains_backwards_supported(chains) &&
3406             dest_chain <= attr->chain) {
3407                 NL_SET_ERR_MSG_MOD(extack,
3408                                    "Goto lower numbered chain isn't supported");
3409                 return -EOPNOTSUPP;
3410         }
3411
3412         if (dest_chain > max_chain) {
3413                 NL_SET_ERR_MSG_MOD(extack,
3414                                    "Requested destination chain is out of supported range");
3415                 return -EOPNOTSUPP;
3416         }
3417
3418         if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3419                        MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3420             !reformat_and_fwd) {
3421                 NL_SET_ERR_MSG_MOD(extack,
3422                                    "Goto chain is not allowed if action has reformat or decap");
3423                 return -EOPNOTSUPP;
3424         }
3425
3426         return 0;
3427 }
3428
3429 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3430                                 struct flow_action *flow_action,
3431                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3432                                 struct mlx5e_tc_flow *flow,
3433                                 struct netlink_ext_ack *extack)
3434 {
3435         struct mlx5_flow_attr *attr = flow->attr;
3436         struct pedit_headers_action hdrs[2] = {};
3437         const struct flow_action_entry *act;
3438         struct mlx5_nic_flow_attr *nic_attr;
3439         u32 action = 0;
3440         int err, i;
3441
3442         if (!flow_action_has_entries(flow_action))
3443                 return -EINVAL;
3444
3445         if (!flow_action_hw_stats_check(flow_action, extack,
3446                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
3447                 return -EOPNOTSUPP;
3448
3449         nic_attr = attr->nic_attr;
3450
3451         nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3452
3453         flow_action_for_each(i, act, flow_action) {
3454                 switch (act->id) {
3455                 case FLOW_ACTION_ACCEPT:
3456                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3457                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
3458                         break;
3459                 case FLOW_ACTION_DROP:
3460                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3461                         if (MLX5_CAP_FLOWTABLE(priv->mdev,
3462                                                flow_table_properties_nic_receive.flow_counter))
3463                                 action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3464                         break;
3465                 case FLOW_ACTION_MANGLE:
3466                 case FLOW_ACTION_ADD:
3467                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3468                                                     parse_attr, hdrs, NULL, extack);
3469                         if (err)
3470                                 return err;
3471
3472                         action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3473                         break;
3474                 case FLOW_ACTION_VLAN_MANGLE:
3475                         err = add_vlan_rewrite_action(priv,
3476                                                       MLX5_FLOW_NAMESPACE_KERNEL,
3477                                                       act, parse_attr, hdrs,
3478                                                       &action, extack);
3479                         if (err)
3480                                 return err;
3481
3482                         break;
3483                 case FLOW_ACTION_CSUM:
3484                         if (csum_offload_supported(priv, action,
3485                                                    act->csum_flags,
3486                                                    extack))
3487                                 break;
3488
3489                         return -EOPNOTSUPP;
3490                 case FLOW_ACTION_REDIRECT: {
3491                         struct net_device *peer_dev = act->dev;
3492
3493                         if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3494                             same_hw_devs(priv, netdev_priv(peer_dev))) {
3495                                 parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3496                                 flow_flag_set(flow, HAIRPIN);
3497                                 action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3498                                           MLX5_FLOW_CONTEXT_ACTION_COUNT;
3499                         } else {
3500                                 NL_SET_ERR_MSG_MOD(extack,
3501                                                    "device is not on same HW, can't offload");
3502                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3503                                             peer_dev->name);
3504                                 return -EINVAL;
3505                         }
3506                         }
3507                         break;
3508                 case FLOW_ACTION_MARK: {
3509                         u32 mark = act->mark;
3510
3511                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3512                                 NL_SET_ERR_MSG_MOD(extack,
3513                                                    "Bad flow mark - only 16 bit is supported");
3514                                 return -EINVAL;
3515                         }
3516
3517                         nic_attr->flow_tag = mark;
3518                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3519                         }
3520                         break;
3521                 case FLOW_ACTION_GOTO:
3522                         err = validate_goto_chain(priv, flow, act, action,
3523                                                   extack);
3524                         if (err)
3525                                 return err;
3526
3527                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3528                         attr->dest_chain = act->chain_index;
3529                         break;
3530                 case FLOW_ACTION_CT:
3531                         err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3532                         if (err)
3533                                 return err;
3534
3535                         flow_flag_set(flow, CT);
3536                         break;
3537                 default:
3538                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3539                         return -EOPNOTSUPP;
3540                 }
3541         }
3542
3543         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3544             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3545                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3546                                             parse_attr, hdrs, &action, extack);
3547                 if (err)
3548                         return err;
3549                 /* in case all pedit actions are skipped, remove the MOD_HDR
3550                  * flag.
3551                  */
3552                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
3553                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3554                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3555                 }
3556         }
3557
3558         attr->action = action;
3559
3560         if (attr->dest_chain) {
3561                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3562                         NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3563                         return -EOPNOTSUPP;
3564                 }
3565                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3566         }
3567
3568         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3569                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3570
3571         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3572                 return -EOPNOTSUPP;
3573
3574         return 0;
3575 }
3576
3577 struct encap_key {
3578         const struct ip_tunnel_key *ip_tun_key;
3579         struct mlx5e_tc_tunnel *tc_tunnel;
3580 };
3581
3582 static inline int cmp_encap_info(struct encap_key *a,
3583                                  struct encap_key *b)
3584 {
3585         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3586                a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
3587 }
3588
3589 static inline int cmp_decap_info(struct mlx5e_decap_key *a,
3590                                  struct mlx5e_decap_key *b)
3591 {
3592         return memcmp(&a->key, &b->key, sizeof(b->key));
3593 }
3594
3595 static inline int hash_encap_info(struct encap_key *key)
3596 {
3597         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3598                      key->tc_tunnel->tunnel_type);
3599 }
3600
3601 static inline int hash_decap_info(struct mlx5e_decap_key *key)
3602 {
3603         return jhash(&key->key, sizeof(key->key), 0);
3604 }
3605
3606 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3607                                   struct net_device *peer_netdev)
3608 {
3609         struct mlx5e_priv *peer_priv;
3610
3611         peer_priv = netdev_priv(peer_netdev);
3612
3613         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3614                 mlx5e_eswitch_vf_rep(priv->netdev) &&
3615                 mlx5e_eswitch_vf_rep(peer_netdev) &&
3616                 same_hw_devs(priv, peer_priv));
3617 }
3618
3619 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3620 {
3621         return refcount_inc_not_zero(&e->refcnt);
3622 }
3623
3624 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
3625 {
3626         return refcount_inc_not_zero(&e->refcnt);
3627 }
3628
3629 static struct mlx5e_encap_entry *
3630 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3631                 uintptr_t hash_key)
3632 {
3633         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3634         struct mlx5e_encap_entry *e;
3635         struct encap_key e_key;
3636
3637         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3638                                    encap_hlist, hash_key) {
3639                 e_key.ip_tun_key = &e->tun_info->key;
3640                 e_key.tc_tunnel = e->tunnel;
3641                 if (!cmp_encap_info(&e_key, key) &&
3642                     mlx5e_encap_take(e))
3643                         return e;
3644         }
3645
3646         return NULL;
3647 }
3648
3649 static struct mlx5e_decap_entry *
3650 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
3651                 uintptr_t hash_key)
3652 {
3653         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3654         struct mlx5e_decap_key r_key;
3655         struct mlx5e_decap_entry *e;
3656
3657         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
3658                                    hlist, hash_key) {
3659                 r_key = e->key;
3660                 if (!cmp_decap_info(&r_key, key) &&
3661                     mlx5e_decap_take(e))
3662                         return e;
3663         }
3664         return NULL;
3665 }
3666
3667 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3668 {
3669         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3670
3671         return kmemdup(tun_info, tun_size, GFP_KERNEL);
3672 }
3673
3674 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3675                                       struct mlx5e_tc_flow *flow,
3676                                       int out_index,
3677                                       struct mlx5e_encap_entry *e,
3678                                       struct netlink_ext_ack *extack)
3679 {
3680         int i;
3681
3682         for (i = 0; i < out_index; i++) {
3683                 if (flow->encaps[i].e != e)
3684                         continue;
3685                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3686                 netdev_err(priv->netdev, "can't duplicate encap action\n");
3687                 return true;
3688         }
3689
3690         return false;
3691 }
3692
3693 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3694                               struct mlx5e_tc_flow *flow,
3695                               struct net_device *mirred_dev,
3696                               int out_index,
3697                               struct netlink_ext_ack *extack,
3698                               struct net_device **encap_dev,
3699                               bool *encap_valid)
3700 {
3701         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3702         struct mlx5e_tc_flow_parse_attr *parse_attr;
3703         struct mlx5_flow_attr *attr = flow->attr;
3704         const struct ip_tunnel_info *tun_info;
3705         struct encap_key key;
3706         struct mlx5e_encap_entry *e;
3707         unsigned short family;
3708         uintptr_t hash_key;
3709         int err = 0;
3710
3711         parse_attr = attr->parse_attr;
3712         tun_info = parse_attr->tun_info[out_index];
3713         family = ip_tunnel_info_af(tun_info);
3714         key.ip_tun_key = &tun_info->key;
3715         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3716         if (!key.tc_tunnel) {
3717                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3718                 return -EOPNOTSUPP;
3719         }
3720
3721         hash_key = hash_encap_info(&key);
3722
3723         mutex_lock(&esw->offloads.encap_tbl_lock);
3724         e = mlx5e_encap_get(priv, &key, hash_key);
3725
3726         /* must verify if encap is valid or not */
3727         if (e) {
3728                 /* Check that entry was not already attached to this flow */
3729                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3730                         err = -EOPNOTSUPP;
3731                         goto out_err;
3732                 }
3733
3734                 mutex_unlock(&esw->offloads.encap_tbl_lock);
3735                 wait_for_completion(&e->res_ready);
3736
3737                 /* Protect against concurrent neigh update. */
3738                 mutex_lock(&esw->offloads.encap_tbl_lock);
3739                 if (e->compl_result < 0) {
3740                         err = -EREMOTEIO;
3741                         goto out_err;
3742                 }
3743                 goto attach_flow;
3744         }
3745
3746         e = kzalloc(sizeof(*e), GFP_KERNEL);
3747         if (!e) {
3748                 err = -ENOMEM;
3749                 goto out_err;
3750         }
3751
3752         refcount_set(&e->refcnt, 1);
3753         init_completion(&e->res_ready);
3754
3755         tun_info = dup_tun_info(tun_info);
3756         if (!tun_info) {
3757                 err = -ENOMEM;
3758                 goto out_err_init;
3759         }
3760         e->tun_info = tun_info;
3761         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3762         if (err)
3763                 goto out_err_init;
3764
3765         INIT_LIST_HEAD(&e->flows);
3766         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3767         mutex_unlock(&esw->offloads.encap_tbl_lock);
3768
3769         if (family == AF_INET)
3770                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3771         else if (family == AF_INET6)
3772                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3773
3774         /* Protect against concurrent neigh update. */
3775         mutex_lock(&esw->offloads.encap_tbl_lock);
3776         complete_all(&e->res_ready);
3777         if (err) {
3778                 e->compl_result = err;
3779                 goto out_err;
3780         }
3781         e->compl_result = 1;
3782
3783 attach_flow:
3784         flow->encaps[out_index].e = e;
3785         list_add(&flow->encaps[out_index].list, &e->flows);
3786         flow->encaps[out_index].index = out_index;
3787         *encap_dev = e->out_dev;
3788         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3789                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3790                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3791                 *encap_valid = true;
3792         } else {
3793                 *encap_valid = false;
3794         }
3795         mutex_unlock(&esw->offloads.encap_tbl_lock);
3796
3797         return err;
3798
3799 out_err:
3800         mutex_unlock(&esw->offloads.encap_tbl_lock);
3801         if (e)
3802                 mlx5e_encap_put(priv, e);
3803         return err;
3804
3805 out_err_init:
3806         mutex_unlock(&esw->offloads.encap_tbl_lock);
3807         kfree(tun_info);
3808         kfree(e);
3809         return err;
3810 }
3811
3812 static int mlx5e_attach_decap(struct mlx5e_priv *priv,
3813                               struct mlx5e_tc_flow *flow,
3814                               struct netlink_ext_ack *extack)
3815 {
3816         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3817         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
3818         struct mlx5e_tc_flow_parse_attr *parse_attr;
3819         struct mlx5e_decap_entry *d;
3820         struct mlx5e_decap_key key;
3821         uintptr_t hash_key;
3822         int err = 0;
3823
3824         parse_attr = flow->attr->parse_attr;
3825         if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
3826                 NL_SET_ERR_MSG_MOD(extack,
3827                                    "encap header larger than max supported");
3828                 return -EOPNOTSUPP;
3829         }
3830
3831         key.key = parse_attr->eth;
3832         hash_key = hash_decap_info(&key);
3833         mutex_lock(&esw->offloads.decap_tbl_lock);
3834         d = mlx5e_decap_get(priv, &key, hash_key);
3835         if (d) {
3836                 mutex_unlock(&esw->offloads.decap_tbl_lock);
3837                 wait_for_completion(&d->res_ready);
3838                 mutex_lock(&esw->offloads.decap_tbl_lock);
3839                 if (d->compl_result) {
3840                         err = -EREMOTEIO;
3841                         goto out_free;
3842                 }
3843                 goto found;
3844         }
3845
3846         d = kzalloc(sizeof(*d), GFP_KERNEL);
3847         if (!d) {
3848                 err = -ENOMEM;
3849                 goto out_err;
3850         }
3851
3852         d->key = key;
3853         refcount_set(&d->refcnt, 1);
3854         init_completion(&d->res_ready);
3855         INIT_LIST_HEAD(&d->flows);
3856         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
3857         mutex_unlock(&esw->offloads.decap_tbl_lock);
3858
3859         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
3860                                                      MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
3861                                                      sizeof(parse_attr->eth),
3862                                                      &parse_attr->eth,
3863                                                      MLX5_FLOW_NAMESPACE_FDB);
3864         if (IS_ERR(d->pkt_reformat)) {
3865                 err = PTR_ERR(d->pkt_reformat);
3866                 d->compl_result = err;
3867         }
3868         mutex_lock(&esw->offloads.decap_tbl_lock);
3869         complete_all(&d->res_ready);
3870         if (err)
3871                 goto out_free;
3872
3873 found:
3874         flow->decap_reformat = d;
3875         attr->decap_pkt_reformat = d->pkt_reformat;
3876         list_add(&flow->l3_to_l2_reformat, &d->flows);
3877         mutex_unlock(&esw->offloads.decap_tbl_lock);
3878         return 0;
3879
3880 out_free:
3881         mutex_unlock(&esw->offloads.decap_tbl_lock);
3882         mlx5e_decap_put(priv, d);
3883         return err;
3884
3885 out_err:
3886         mutex_unlock(&esw->offloads.decap_tbl_lock);
3887         return err;
3888 }
3889
3890 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3891                                 const struct flow_action_entry *act,
3892                                 struct mlx5_esw_flow_attr *attr,
3893                                 u32 *action)
3894 {
3895         u8 vlan_idx = attr->total_vlan;
3896
3897         if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3898                 return -EOPNOTSUPP;
3899
3900         switch (act->id) {
3901         case FLOW_ACTION_VLAN_POP:
3902                 if (vlan_idx) {
3903                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3904                                                                  MLX5_FS_VLAN_DEPTH))
3905                                 return -EOPNOTSUPP;
3906
3907                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3908                 } else {
3909                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3910                 }
3911                 break;
3912         case FLOW_ACTION_VLAN_PUSH:
3913                 attr->vlan_vid[vlan_idx] = act->vlan.vid;
3914                 attr->vlan_prio[vlan_idx] = act->vlan.prio;
3915                 attr->vlan_proto[vlan_idx] = act->vlan.proto;
3916                 if (!attr->vlan_proto[vlan_idx])
3917                         attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3918
3919                 if (vlan_idx) {
3920                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3921                                                                  MLX5_FS_VLAN_DEPTH))
3922                                 return -EOPNOTSUPP;
3923
3924                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3925                 } else {
3926                         if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3927                             (act->vlan.proto != htons(ETH_P_8021Q) ||
3928                              act->vlan.prio))
3929                                 return -EOPNOTSUPP;
3930
3931                         *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3932                 }
3933                 break;
3934         default:
3935                 return -EINVAL;
3936         }
3937
3938         attr->total_vlan = vlan_idx + 1;
3939
3940         return 0;
3941 }
3942
3943 static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
3944                                           struct net_device *out_dev)
3945 {
3946         struct net_device *fdb_out_dev = out_dev;
3947         struct net_device *uplink_upper;
3948
3949         rcu_read_lock();
3950         uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
3951         if (uplink_upper && netif_is_lag_master(uplink_upper) &&
3952             uplink_upper == out_dev) {
3953                 fdb_out_dev = uplink_dev;
3954         } else if (netif_is_lag_master(out_dev)) {
3955                 fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
3956                 if (fdb_out_dev &&
3957                     (!mlx5e_eswitch_rep(fdb_out_dev) ||
3958                      !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
3959                         fdb_out_dev = NULL;
3960         }
3961         rcu_read_unlock();
3962         return fdb_out_dev;
3963 }
3964
3965 static int add_vlan_push_action(struct mlx5e_priv *priv,
3966                                 struct mlx5_flow_attr *attr,
3967                                 struct net_device **out_dev,
3968                                 u32 *action)
3969 {
3970         struct net_device *vlan_dev = *out_dev;
3971         struct flow_action_entry vlan_act = {
3972                 .id = FLOW_ACTION_VLAN_PUSH,
3973                 .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3974                 .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3975                 .vlan.prio = 0,
3976         };
3977         int err;
3978
3979         err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
3980         if (err)
3981                 return err;
3982
3983         *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
3984                                         dev_get_iflink(vlan_dev));
3985         if (is_vlan_dev(*out_dev))
3986                 err = add_vlan_push_action(priv, attr, out_dev, action);
3987
3988         return err;
3989 }
3990
3991 static int add_vlan_pop_action(struct mlx5e_priv *priv,
3992                                struct mlx5_flow_attr *attr,
3993                                u32 *action)
3994 {
3995         struct flow_action_entry vlan_act = {
3996                 .id = FLOW_ACTION_VLAN_POP,
3997         };
3998         int nest_level, err = 0;
3999
4000         nest_level = attr->parse_attr->filter_dev->lower_level -
4001                                                 priv->netdev->lower_level;
4002         while (nest_level--) {
4003                 err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4004                 if (err)
4005                         return err;
4006         }
4007
4008         return err;
4009 }
4010
4011 static bool same_hw_reps(struct mlx5e_priv *priv,
4012                          struct net_device *peer_netdev)
4013 {
4014         struct mlx5e_priv *peer_priv;
4015
4016         peer_priv = netdev_priv(peer_netdev);
4017
4018         return mlx5e_eswitch_rep(priv->netdev) &&
4019                mlx5e_eswitch_rep(peer_netdev) &&
4020                same_hw_devs(priv, peer_priv);
4021 }
4022
4023 static bool is_lag_dev(struct mlx5e_priv *priv,
4024                        struct net_device *peer_netdev)
4025 {
4026         return ((mlx5_lag_is_sriov(priv->mdev) ||
4027                  mlx5_lag_is_multipath(priv->mdev)) &&
4028                  same_hw_reps(priv, peer_netdev));
4029 }
4030
4031 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4032                                     struct net_device *out_dev)
4033 {
4034         if (is_merged_eswitch_vfs(priv, out_dev))
4035                 return true;
4036
4037         if (is_lag_dev(priv, out_dev))
4038                 return true;
4039
4040         return mlx5e_eswitch_rep(out_dev) &&
4041                same_port_devs(priv, netdev_priv(out_dev));
4042 }
4043
4044 static bool is_duplicated_output_device(struct net_device *dev,
4045                                         struct net_device *out_dev,
4046                                         int *ifindexes, int if_count,
4047                                         struct netlink_ext_ack *extack)
4048 {
4049         int i;
4050
4051         for (i = 0; i < if_count; i++) {
4052                 if (ifindexes[i] == out_dev->ifindex) {
4053                         NL_SET_ERR_MSG_MOD(extack,
4054                                            "can't duplicate output to same device");
4055                         netdev_err(dev, "can't duplicate output to same device: %s\n",
4056                                    out_dev->name);
4057                         return true;
4058                 }
4059         }
4060
4061         return false;
4062 }
4063
4064 static int verify_uplink_forwarding(struct mlx5e_priv *priv,
4065                                     struct mlx5e_tc_flow *flow,
4066                                     struct net_device *out_dev,
4067                                     struct netlink_ext_ack *extack)
4068 {
4069         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4070         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4071         struct mlx5e_rep_priv *rep_priv;
4072
4073         /* Forwarding non encapsulated traffic between
4074          * uplink ports is allowed only if
4075          * termination_table_raw_traffic cap is set.
4076          *
4077          * Input vport was stored attr->in_rep.
4078          * In LAG case, *priv* is the private data of
4079          * uplink which may be not the input vport.
4080          */
4081         rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
4082
4083         if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
4084               mlx5e_eswitch_uplink_rep(out_dev)))
4085                 return 0;
4086
4087         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
4088                                         termination_table_raw_traffic)) {
4089                 NL_SET_ERR_MSG_MOD(extack,
4090                                    "devices are both uplink, can't offload forwarding");
4091                         pr_err("devices %s %s are both uplink, can't offload forwarding\n",
4092                                priv->netdev->name, out_dev->name);
4093                         return -EOPNOTSUPP;
4094         } else if (out_dev != rep_priv->netdev) {
4095                 NL_SET_ERR_MSG_MOD(extack,
4096                                    "devices are not the same uplink, can't offload forwarding");
4097                 pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
4098                        priv->netdev->name, out_dev->name);
4099                 return -EOPNOTSUPP;
4100         }
4101         return 0;
4102 }
4103
4104 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
4105                                 struct flow_action *flow_action,
4106                                 struct mlx5e_tc_flow *flow,
4107                                 struct netlink_ext_ack *extack,
4108                                 struct net_device *filter_dev)
4109 {
4110         struct pedit_headers_action hdrs[2] = {};
4111         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4112         struct mlx5e_tc_flow_parse_attr *parse_attr;
4113         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4114         const struct ip_tunnel_info *info = NULL;
4115         struct mlx5_flow_attr *attr = flow->attr;
4116         int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
4117         bool ft_flow = mlx5e_is_ft_flow(flow);
4118         const struct flow_action_entry *act;
4119         struct mlx5_esw_flow_attr *esw_attr;
4120         bool encap = false, decap = false;
4121         u32 action = attr->action;
4122         int err, i, if_count = 0;
4123         bool mpls_push = false;
4124
4125         if (!flow_action_has_entries(flow_action))
4126                 return -EINVAL;
4127
4128         if (!flow_action_hw_stats_check(flow_action, extack,
4129                                         FLOW_ACTION_HW_STATS_DELAYED_BIT))
4130                 return -EOPNOTSUPP;
4131
4132         esw_attr = attr->esw_attr;
4133         parse_attr = attr->parse_attr;
4134
4135         flow_action_for_each(i, act, flow_action) {
4136                 switch (act->id) {
4137                 case FLOW_ACTION_DROP:
4138                         action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
4139                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
4140                         break;
4141                 case FLOW_ACTION_TRAP:
4142                         if (!flow_offload_has_one_action(flow_action)) {
4143                                 NL_SET_ERR_MSG_MOD(extack,
4144                                                    "action trap is supported as a sole action only");
4145                                 return -EOPNOTSUPP;
4146                         }
4147                         action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4148                                    MLX5_FLOW_CONTEXT_ACTION_COUNT);
4149                         attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
4150                         break;
4151                 case FLOW_ACTION_MPLS_PUSH:
4152                         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
4153                                                         reformat_l2_to_l3_tunnel) ||
4154                             act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
4155                                 NL_SET_ERR_MSG_MOD(extack,
4156                                                    "mpls push is supported only for mpls_uc protocol");
4157                                 return -EOPNOTSUPP;
4158                         }
4159                         mpls_push = true;
4160                         break;
4161                 case FLOW_ACTION_MPLS_POP:
4162                         /* we only support mpls pop if it is the first action
4163                          * and the filter net device is bareudp. Subsequent
4164                          * actions can be pedit and the last can be mirred
4165                          * egress redirect.
4166                          */
4167                         if (i) {
4168                                 NL_SET_ERR_MSG_MOD(extack,
4169                                                    "mpls pop supported only as first action");
4170                                 return -EOPNOTSUPP;
4171                         }
4172                         if (!netif_is_bareudp(filter_dev)) {
4173                                 NL_SET_ERR_MSG_MOD(extack,
4174                                                    "mpls pop supported only on bareudp devices");
4175                                 return -EOPNOTSUPP;
4176                         }
4177
4178                         parse_attr->eth.h_proto = act->mpls_pop.proto;
4179                         action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
4180                         flow_flag_set(flow, L3_TO_L2_DECAP);
4181                         break;
4182                 case FLOW_ACTION_MANGLE:
4183                 case FLOW_ACTION_ADD:
4184                         err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
4185                                                     parse_attr, hdrs, flow, extack);
4186                         if (err)
4187                                 return err;
4188
4189                         if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
4190                                 action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4191                                 esw_attr->split_count = esw_attr->out_count;
4192                         }
4193                         break;
4194                 case FLOW_ACTION_CSUM:
4195                         if (csum_offload_supported(priv, action,
4196                                                    act->csum_flags, extack))
4197                                 break;
4198
4199                         return -EOPNOTSUPP;
4200                 case FLOW_ACTION_REDIRECT:
4201                 case FLOW_ACTION_MIRRED: {
4202                         struct mlx5e_priv *out_priv;
4203                         struct net_device *out_dev;
4204
4205                         out_dev = act->dev;
4206                         if (!out_dev) {
4207                                 /* out_dev is NULL when filters with
4208                                  * non-existing mirred device are replayed to
4209                                  * the driver.
4210                                  */
4211                                 return -EINVAL;
4212                         }
4213
4214                         if (mpls_push && !netif_is_bareudp(out_dev)) {
4215                                 NL_SET_ERR_MSG_MOD(extack,
4216                                                    "mpls is supported only through a bareudp device");
4217                                 return -EOPNOTSUPP;
4218                         }
4219
4220                         if (ft_flow && out_dev == priv->netdev) {
4221                                 /* Ignore forward to self rules generated
4222                                  * by adding both mlx5 devs to the flow table
4223                                  * block on a normal nft offload setup.
4224                                  */
4225                                 return -EOPNOTSUPP;
4226                         }
4227
4228                         if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
4229                                 NL_SET_ERR_MSG_MOD(extack,
4230                                                    "can't support more output ports, can't offload forwarding");
4231                                 netdev_warn(priv->netdev,
4232                                             "can't support more than %d output ports, can't offload forwarding\n",
4233                                             esw_attr->out_count);
4234                                 return -EOPNOTSUPP;
4235                         }
4236
4237                         action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4238                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
4239                         if (encap) {
4240                                 parse_attr->mirred_ifindex[esw_attr->out_count] =
4241                                         out_dev->ifindex;
4242                                 parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
4243                                 if (!parse_attr->tun_info[esw_attr->out_count])
4244                                         return -ENOMEM;
4245                                 encap = false;
4246                                 esw_attr->dests[esw_attr->out_count].flags |=
4247                                         MLX5_ESW_DEST_ENCAP;
4248                                 esw_attr->out_count++;
4249                                 /* attr->dests[].rep is resolved when we
4250                                  * handle encap
4251                                  */
4252                         } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4253                                 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4254                                 struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4255
4256                                 if (is_duplicated_output_device(priv->netdev,
4257                                                                 out_dev,
4258                                                                 ifindexes,
4259                                                                 if_count,
4260                                                                 extack))
4261                                         return -EOPNOTSUPP;
4262
4263                                 ifindexes[if_count] = out_dev->ifindex;
4264                                 if_count++;
4265
4266                                 out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4267                                 if (!out_dev)
4268                                         return -ENODEV;
4269
4270                                 if (is_vlan_dev(out_dev)) {
4271                                         err = add_vlan_push_action(priv, attr,
4272                                                                    &out_dev,
4273                                                                    &action);
4274                                         if (err)
4275                                                 return err;
4276                                 }
4277
4278                                 if (is_vlan_dev(parse_attr->filter_dev)) {
4279                                         err = add_vlan_pop_action(priv, attr,
4280                                                                   &action);
4281                                         if (err)
4282                                                 return err;
4283                                 }
4284
4285                                 err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4286                                 if (err)
4287                                         return err;
4288
4289                                 if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4290                                         NL_SET_ERR_MSG_MOD(extack,
4291                                                            "devices are not on same switch HW, can't offload forwarding");
4292                                         return -EOPNOTSUPP;
4293                                 }
4294
4295                                 out_priv = netdev_priv(out_dev);
4296                                 rpriv = out_priv->ppriv;
4297                                 esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
4298                                 esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
4299                                 esw_attr->out_count++;
4300                         } else if (parse_attr->filter_dev != priv->netdev) {
4301                                 /* All mlx5 devices are called to configure
4302                                  * high level device filters. Therefore, the
4303                                  * *attempt* to  install a filter on invalid
4304                                  * eswitch should not trigger an explicit error
4305                                  */
4306                                 return -EINVAL;
4307                         } else {
4308                                 NL_SET_ERR_MSG_MOD(extack,
4309                                                    "devices are not on same switch HW, can't offload forwarding");
4310                                 netdev_warn(priv->netdev,
4311                                             "devices %s %s not on same switch HW, can't offload forwarding\n",
4312                                             priv->netdev->name,
4313                                             out_dev->name);
4314                                 return -EINVAL;
4315                         }
4316                         }
4317                         break;
4318                 case FLOW_ACTION_TUNNEL_ENCAP:
4319                         info = act->tunnel;
4320                         if (info)
4321                                 encap = true;
4322                         else
4323                                 return -EOPNOTSUPP;
4324
4325                         break;
4326                 case FLOW_ACTION_VLAN_PUSH:
4327                 case FLOW_ACTION_VLAN_POP:
4328                         if (act->id == FLOW_ACTION_VLAN_PUSH &&
4329                             (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4330                                 /* Replace vlan pop+push with vlan modify */
4331                                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4332                                 err = add_vlan_rewrite_action(priv,
4333                                                               MLX5_FLOW_NAMESPACE_FDB,
4334                                                               act, parse_attr, hdrs,
4335                                                               &action, extack);
4336                         } else {
4337                                 err = parse_tc_vlan_action(priv, act, esw_attr, &action);
4338                         }
4339                         if (err)
4340                                 return err;
4341
4342                         esw_attr->split_count = esw_attr->out_count;
4343                         break;
4344                 case FLOW_ACTION_VLAN_MANGLE:
4345                         err = add_vlan_rewrite_action(priv,
4346                                                       MLX5_FLOW_NAMESPACE_FDB,
4347                                                       act, parse_attr, hdrs,
4348                                                       &action, extack);
4349                         if (err)
4350                                 return err;
4351
4352                         esw_attr->split_count = esw_attr->out_count;
4353                         break;
4354                 case FLOW_ACTION_TUNNEL_DECAP:
4355                         decap = true;
4356                         break;
4357                 case FLOW_ACTION_GOTO:
4358                         err = validate_goto_chain(priv, flow, act, action,
4359                                                   extack);
4360                         if (err)
4361                                 return err;
4362
4363                         action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4364                         attr->dest_chain = act->chain_index;
4365                         break;
4366                 case FLOW_ACTION_CT:
4367                         err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
4368                         if (err)
4369                                 return err;
4370
4371                         flow_flag_set(flow, CT);
4372                         break;
4373                 default:
4374                         NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4375                         return -EOPNOTSUPP;
4376                 }
4377         }
4378
4379         if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4380             action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4381                 /* For prio tag mode, replace vlan pop with rewrite vlan prio
4382                  * tag rewrite.
4383                  */
4384                 action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4385                 err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4386                                                        &action, extack);
4387                 if (err)
4388                         return err;
4389         }
4390
4391         if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4392             hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4393                 err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4394                                             parse_attr, hdrs, &action, extack);
4395                 if (err)
4396                         return err;
4397                 /* in case all pedit actions are skipped, remove the MOD_HDR
4398                  * flag. we might have set split_count either by pedit or
4399                  * pop/push. if there is no pop/push either, reset it too.
4400                  */
4401                 if (parse_attr->mod_hdr_acts.num_actions == 0) {
4402                         action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4403                         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4404                         if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4405                               (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4406                                 esw_attr->split_count = 0;
4407                 }
4408         }
4409
4410         attr->action = action;
4411         if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
4412                 return -EOPNOTSUPP;
4413
4414         if (attr->dest_chain) {
4415                 if (decap) {
4416                         /* It can be supported if we'll create a mapping for
4417                          * the tunnel device only (without tunnel), and set
4418                          * this tunnel id with this decap flow.
4419                          *
4420                          * On restore (miss), we'll just set this saved tunnel
4421                          * device.
4422                          */
4423
4424                         NL_SET_ERR_MSG(extack,
4425                                        "Decap with goto isn't supported");
4426                         netdev_warn(priv->netdev,
4427                                     "Decap with goto isn't supported");
4428                         return -EOPNOTSUPP;
4429                 }
4430
4431                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
4432                         NL_SET_ERR_MSG_MOD(extack,
4433                                            "Mirroring goto chain rules isn't supported");
4434                         return -EOPNOTSUPP;
4435                 }
4436                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4437         }
4438
4439         if (!(attr->action &
4440               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4441                 NL_SET_ERR_MSG_MOD(extack,
4442                                    "Rule must have at least one forward/drop action");
4443                 return -EOPNOTSUPP;
4444         }
4445
4446         if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4447                 NL_SET_ERR_MSG_MOD(extack,
4448                                    "current firmware doesn't support split rule for port mirroring");
4449                 netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
4450                 return -EOPNOTSUPP;
4451         }
4452
4453         return 0;
4454 }
4455
4456 static void get_flags(int flags, unsigned long *flow_flags)
4457 {
4458         unsigned long __flow_flags = 0;
4459
4460         if (flags & MLX5_TC_FLAG(INGRESS))
4461                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4462         if (flags & MLX5_TC_FLAG(EGRESS))
4463                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4464
4465         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4466                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4467         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4468                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4469         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4470                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4471
4472         *flow_flags = __flow_flags;
4473 }
4474
4475 static const struct rhashtable_params tc_ht_params = {
4476         .head_offset = offsetof(struct mlx5e_tc_flow, node),
4477         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4478         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4479         .automatic_shrinking = true,
4480 };
4481
4482 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4483                                     unsigned long flags)
4484 {
4485         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4486         struct mlx5e_rep_priv *uplink_rpriv;
4487
4488         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4489                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4490                 return &uplink_rpriv->uplink_priv.tc_ht;
4491         } else /* NIC offload */
4492                 return &priv->fs.tc.ht;
4493 }
4494
4495 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4496 {
4497         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4498         struct mlx5_flow_attr *attr = flow->attr;
4499         bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4500                 flow_flag_test(flow, INGRESS);
4501         bool act_is_encap = !!(attr->action &
4502                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4503         bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4504                                                 MLX5_DEVCOM_ESW_OFFLOADS);
4505
4506         if (!esw_paired)
4507                 return false;
4508
4509         if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4510              mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4511             (is_rep_ingress || act_is_encap))
4512                 return true;
4513
4514         return false;
4515 }
4516
4517 struct mlx5_flow_attr *
4518 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4519 {
4520         u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4521                                 sizeof(struct mlx5_esw_flow_attr) :
4522                                 sizeof(struct mlx5_nic_flow_attr);
4523         struct mlx5_flow_attr *attr;
4524
4525         return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4526 }
4527
4528 static int
4529 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4530                  struct flow_cls_offload *f, unsigned long flow_flags,
4531                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
4532                  struct mlx5e_tc_flow **__flow)
4533 {
4534         struct mlx5e_tc_flow_parse_attr *parse_attr;
4535         struct mlx5_flow_attr *attr;
4536         struct mlx5e_tc_flow *flow;
4537         int err = -ENOMEM;
4538         int out_index;
4539
4540         flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4541         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4542         if (!parse_attr || !flow)
4543                 goto err_free;
4544
4545         flow->flags = flow_flags;
4546         flow->cookie = f->cookie;
4547         flow->priv = priv;
4548
4549         attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4550         if (!attr)
4551                 goto err_free;
4552
4553         flow->attr = attr;
4554
4555         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4556                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4557         INIT_LIST_HEAD(&flow->hairpin);
4558         INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4559         refcount_set(&flow->refcnt, 1);
4560         init_completion(&flow->init_done);
4561
4562         *__flow = flow;
4563         *__parse_attr = parse_attr;
4564
4565         return 0;
4566
4567 err_free:
4568         kfree(flow);
4569         kvfree(parse_attr);
4570         return err;
4571 }
4572
4573 static void
4574 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4575                      struct mlx5e_tc_flow_parse_attr *parse_attr,
4576                      struct flow_cls_offload *f)
4577 {
4578         attr->parse_attr = parse_attr;
4579         attr->chain = f->common.chain_index;
4580         attr->prio = f->common.prio;
4581 }
4582
4583 static void
4584 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4585                          struct mlx5e_priv *priv,
4586                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4587                          struct flow_cls_offload *f,
4588                          struct mlx5_eswitch_rep *in_rep,
4589                          struct mlx5_core_dev *in_mdev)
4590 {
4591         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4592         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4593
4594         mlx5e_flow_attr_init(attr, parse_attr, f);
4595
4596         esw_attr->in_rep = in_rep;
4597         esw_attr->in_mdev = in_mdev;
4598
4599         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4600             MLX5_COUNTER_SOURCE_ESWITCH)
4601                 esw_attr->counter_dev = in_mdev;
4602         else
4603                 esw_attr->counter_dev = priv->mdev;
4604 }
4605
4606 static struct mlx5e_tc_flow *
4607 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4608                      struct flow_cls_offload *f,
4609                      unsigned long flow_flags,
4610                      struct net_device *filter_dev,
4611                      struct mlx5_eswitch_rep *in_rep,
4612                      struct mlx5_core_dev *in_mdev)
4613 {
4614         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4615         struct netlink_ext_ack *extack = f->common.extack;
4616         struct mlx5e_tc_flow_parse_attr *parse_attr;
4617         struct mlx5e_tc_flow *flow;
4618         int attr_size, err;
4619
4620         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4621         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4622         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4623                                &parse_attr, &flow);
4624         if (err)
4625                 goto out;
4626
4627         parse_attr->filter_dev = filter_dev;
4628         mlx5e_flow_esw_attr_init(flow->attr,
4629                                  priv, parse_attr,
4630                                  f, in_rep, in_mdev);
4631
4632         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4633                                f, filter_dev);
4634         if (err)
4635                 goto err_free;
4636
4637         /* actions validation depends on parsing the ct matches first */
4638         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4639                                    &flow->attr->ct_attr, extack);
4640         if (err)
4641                 goto err_free;
4642
4643         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4644         if (err)
4645                 goto err_free;
4646
4647         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4648         complete_all(&flow->init_done);
4649         if (err) {
4650                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4651                         goto err_free;
4652
4653                 add_unready_flow(flow);
4654         }
4655
4656         return flow;
4657
4658 err_free:
4659         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4660         mlx5e_flow_put(priv, flow);
4661 out:
4662         return ERR_PTR(err);
4663 }
4664
4665 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4666                                       struct mlx5e_tc_flow *flow,
4667                                       unsigned long flow_flags)
4668 {
4669         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4670         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4671         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4672         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4673         struct mlx5e_tc_flow_parse_attr *parse_attr;
4674         struct mlx5e_rep_priv *peer_urpriv;
4675         struct mlx5e_tc_flow *peer_flow;
4676         struct mlx5_core_dev *in_mdev;
4677         int err = 0;
4678
4679         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4680         if (!peer_esw)
4681                 return -ENODEV;
4682
4683         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4684         peer_priv = netdev_priv(peer_urpriv->netdev);
4685
4686         /* in_mdev is assigned of which the packet originated from.
4687          * So packets redirected to uplink use the same mdev of the
4688          * original flow and packets redirected from uplink use the
4689          * peer mdev.
4690          */
4691         if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4692                 in_mdev = peer_priv->mdev;
4693         else
4694                 in_mdev = priv->mdev;
4695
4696         parse_attr = flow->attr->parse_attr;
4697         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4698                                          parse_attr->filter_dev,
4699                                          attr->in_rep, in_mdev);
4700         if (IS_ERR(peer_flow)) {
4701                 err = PTR_ERR(peer_flow);
4702                 goto out;
4703         }
4704
4705         flow->peer_flow = peer_flow;
4706         flow_flag_set(flow, DUP);
4707         mutex_lock(&esw->offloads.peer_mutex);
4708         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4709         mutex_unlock(&esw->offloads.peer_mutex);
4710
4711 out:
4712         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4713         return err;
4714 }
4715
4716 static int
4717 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4718                    struct flow_cls_offload *f,
4719                    unsigned long flow_flags,
4720                    struct net_device *filter_dev,
4721                    struct mlx5e_tc_flow **__flow)
4722 {
4723         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4724         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4725         struct mlx5_core_dev *in_mdev = priv->mdev;
4726         struct mlx5e_tc_flow *flow;
4727         int err;
4728
4729         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4730                                     in_mdev);
4731         if (IS_ERR(flow))
4732                 return PTR_ERR(flow);
4733
4734         if (is_peer_flow_needed(flow)) {
4735                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4736                 if (err) {
4737                         mlx5e_tc_del_fdb_flow(priv, flow);
4738                         goto out;
4739                 }
4740         }
4741
4742         *__flow = flow;
4743
4744         return 0;
4745
4746 out:
4747         return err;
4748 }
4749
4750 static int
4751 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4752                    struct flow_cls_offload *f,
4753                    unsigned long flow_flags,
4754                    struct net_device *filter_dev,
4755                    struct mlx5e_tc_flow **__flow)
4756 {
4757         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4758         struct netlink_ext_ack *extack = f->common.extack;
4759         struct mlx5e_tc_flow_parse_attr *parse_attr;
4760         struct mlx5e_tc_flow *flow;
4761         int attr_size, err;
4762
4763         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4764                 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4765                         return -EOPNOTSUPP;
4766         } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4767                 return -EOPNOTSUPP;
4768         }
4769
4770         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4771         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4772         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4773                                &parse_attr, &flow);
4774         if (err)
4775                 goto out;
4776
4777         parse_attr->filter_dev = filter_dev;
4778         mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4779
4780         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4781                                f, filter_dev);
4782         if (err)
4783                 goto err_free;
4784
4785         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4786                                    &flow->attr->ct_attr, extack);
4787         if (err)
4788                 goto err_free;
4789
4790         err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4791         if (err)
4792                 goto err_free;
4793
4794         err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4795         if (err)
4796                 goto err_free;
4797
4798         flow_flag_set(flow, OFFLOADED);
4799         *__flow = flow;
4800
4801         return 0;
4802
4803 err_free:
4804         dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4805         mlx5e_flow_put(priv, flow);
4806 out:
4807         return err;
4808 }
4809
4810 static int
4811 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4812                   struct flow_cls_offload *f,
4813                   unsigned long flags,
4814                   struct net_device *filter_dev,
4815                   struct mlx5e_tc_flow **flow)
4816 {
4817         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4818         unsigned long flow_flags;
4819         int err;
4820
4821         get_flags(flags, &flow_flags);
4822
4823         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4824                 return -EOPNOTSUPP;
4825
4826         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4827                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4828                                          filter_dev, flow);
4829         else
4830                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4831                                          filter_dev, flow);
4832
4833         return err;
4834 }
4835
4836 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4837                                            struct mlx5e_rep_priv *rpriv)
4838 {
4839         /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4840          * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4841          * function is called from NIC mode.
4842          */
4843         return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4844 }
4845
4846 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4847                            struct flow_cls_offload *f, unsigned long flags)
4848 {
4849         struct netlink_ext_ack *extack = f->common.extack;
4850         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4851         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4852         struct mlx5e_tc_flow *flow;
4853         int err = 0;
4854
4855         rcu_read_lock();
4856         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4857         if (flow) {
4858                 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4859                  * just return 0.
4860                  */
4861                 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4862                         goto rcu_unlock;
4863
4864                 NL_SET_ERR_MSG_MOD(extack,
4865                                    "flow cookie already exists, ignoring");
4866                 netdev_warn_once(priv->netdev,
4867                                  "flow cookie %lx already exists, ignoring\n",
4868                                  f->cookie);
4869                 err = -EEXIST;
4870                 goto rcu_unlock;
4871         }
4872 rcu_unlock:
4873         rcu_read_unlock();
4874         if (flow)
4875                 goto out;
4876
4877         trace_mlx5e_configure_flower(f);
4878         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4879         if (err)
4880                 goto out;
4881
4882         /* Flow rule offloaded to non-uplink representor sharing tc block,
4883          * set the flow's owner dev.
4884          */
4885         if (is_flow_rule_duplicate_allowed(dev, rpriv))
4886                 flow->orig_dev = dev;
4887
4888         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4889         if (err)
4890                 goto err_free;
4891
4892         return 0;
4893
4894 err_free:
4895         mlx5e_flow_put(priv, flow);
4896 out:
4897         return err;
4898 }
4899
4900 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4901 {
4902         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4903         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4904
4905         return flow_flag_test(flow, INGRESS) == dir_ingress &&
4906                 flow_flag_test(flow, EGRESS) == dir_egress;
4907 }
4908
4909 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4910                         struct flow_cls_offload *f, unsigned long flags)
4911 {
4912         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4913         struct mlx5e_tc_flow *flow;
4914         int err;
4915
4916         rcu_read_lock();
4917         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4918         if (!flow || !same_flow_direction(flow, flags)) {
4919                 err = -EINVAL;
4920                 goto errout;
4921         }
4922
4923         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4924          * set.
4925          */
4926         if (flow_flag_test_and_set(flow, DELETED)) {
4927                 err = -EINVAL;
4928                 goto errout;
4929         }
4930         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4931         rcu_read_unlock();
4932
4933         trace_mlx5e_delete_flower(f);
4934         mlx5e_flow_put(priv, flow);
4935
4936         return 0;
4937
4938 errout:
4939         rcu_read_unlock();
4940         return err;
4941 }
4942
4943 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4944                        struct flow_cls_offload *f, unsigned long flags)
4945 {
4946         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4947         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4948         struct mlx5_eswitch *peer_esw;
4949         struct mlx5e_tc_flow *flow;
4950         struct mlx5_fc *counter;
4951         u64 lastuse = 0;
4952         u64 packets = 0;
4953         u64 bytes = 0;
4954         int err = 0;
4955
4956         rcu_read_lock();
4957         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4958                                                 tc_ht_params));
4959         rcu_read_unlock();
4960         if (IS_ERR(flow))
4961                 return PTR_ERR(flow);
4962
4963         if (!same_flow_direction(flow, flags)) {
4964                 err = -EINVAL;
4965                 goto errout;
4966         }
4967
4968         if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4969                 counter = mlx5e_tc_get_counter(flow);
4970                 if (!counter)
4971                         goto errout;
4972
4973                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4974         }
4975
4976         /* Under multipath it's possible for one rule to be currently
4977          * un-offloaded while the other rule is offloaded.
4978          */
4979         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4980         if (!peer_esw)
4981                 goto out;
4982
4983         if (flow_flag_test(flow, DUP) &&
4984             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4985                 u64 bytes2;
4986                 u64 packets2;
4987                 u64 lastuse2;
4988
4989                 counter = mlx5e_tc_get_counter(flow->peer_flow);
4990                 if (!counter)
4991                         goto no_peer_counter;
4992                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4993
4994                 bytes += bytes2;
4995                 packets += packets2;
4996                 lastuse = max_t(u64, lastuse, lastuse2);
4997         }
4998
4999 no_peer_counter:
5000         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5001 out:
5002         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
5003                           FLOW_ACTION_HW_STATS_DELAYED);
5004         trace_mlx5e_stats_flower(f);
5005 errout:
5006         mlx5e_flow_put(priv, flow);
5007         return err;
5008 }
5009
5010 static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
5011                                struct netlink_ext_ack *extack)
5012 {
5013         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5014         struct mlx5_eswitch *esw;
5015         u16 vport_num;
5016         u32 rate_mbps;
5017         int err;
5018
5019         vport_num = rpriv->rep->vport;
5020         if (vport_num >= MLX5_VPORT_ECPF) {
5021                 NL_SET_ERR_MSG_MOD(extack,
5022                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
5023                 return -EOPNOTSUPP;
5024         }
5025
5026         esw = priv->mdev->priv.eswitch;
5027         /* rate is given in bytes/sec.
5028          * First convert to bits/sec and then round to the nearest mbit/secs.
5029          * mbit means million bits.
5030          * Moreover, if rate is non zero we choose to configure to a minimum of
5031          * 1 mbit/sec.
5032          */
5033         rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
5034         err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
5035         if (err)
5036                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5037
5038         return err;
5039 }
5040
5041 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5042                                         struct flow_action *flow_action,
5043                                         struct netlink_ext_ack *extack)
5044 {
5045         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5046         const struct flow_action_entry *act;
5047         int err;
5048         int i;
5049
5050         if (!flow_action_has_entries(flow_action)) {
5051                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5052                 return -EINVAL;
5053         }
5054
5055         if (!flow_offload_has_one_action(flow_action)) {
5056                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5057                 return -EOPNOTSUPP;
5058         }
5059
5060         if (!flow_action_basic_hw_stats_check(flow_action, extack))
5061                 return -EOPNOTSUPP;
5062
5063         flow_action_for_each(i, act, flow_action) {
5064                 switch (act->id) {
5065                 case FLOW_ACTION_POLICE:
5066                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5067                         if (err)
5068                                 return err;
5069
5070                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
5071                         break;
5072                 default:
5073                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5074                         return -EOPNOTSUPP;
5075                 }
5076         }
5077
5078         return 0;
5079 }
5080
5081 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5082                                 struct tc_cls_matchall_offload *ma)
5083 {
5084         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5085         struct netlink_ext_ack *extack = ma->common.extack;
5086
5087         if (!mlx5_esw_qos_enabled(esw)) {
5088                 NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
5089                 return -EOPNOTSUPP;
5090         }
5091
5092         if (ma->common.prio != 1) {
5093                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
5094                 return -EINVAL;
5095         }
5096
5097         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5098 }
5099
5100 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5101                              struct tc_cls_matchall_offload *ma)
5102 {
5103         struct netlink_ext_ack *extack = ma->common.extack;
5104
5105         return apply_police_params(priv, 0, extack);
5106 }
5107
5108 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5109                              struct tc_cls_matchall_offload *ma)
5110 {
5111         struct mlx5e_rep_priv *rpriv = priv->ppriv;
5112         struct rtnl_link_stats64 cur_stats;
5113         u64 dbytes;
5114         u64 dpkts;
5115
5116         cur_stats = priv->stats.vf_vport;
5117         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5118         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5119         rpriv->prev_vf_vport_stats = cur_stats;
5120         flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5121                           FLOW_ACTION_HW_STATS_DELAYED);
5122 }
5123
5124 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5125                                               struct mlx5e_priv *peer_priv)
5126 {
5127         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5128         struct mlx5e_hairpin_entry *hpe, *tmp;
5129         LIST_HEAD(init_wait_list);
5130         u16 peer_vhca_id;
5131         int bkt;
5132
5133         if (!same_hw_devs(priv, peer_priv))
5134                 return;
5135
5136         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5137
5138         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
5139         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
5140                 if (refcount_inc_not_zero(&hpe->refcnt))
5141                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5142         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
5143
5144         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5145                 wait_for_completion(&hpe->res_ready);
5146                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5147                         hpe->hp->pair->peer_gone = true;
5148
5149                 mlx5e_hairpin_put(priv, hpe);
5150         }
5151 }
5152
5153 static int mlx5e_tc_netdev_event(struct notifier_block *this,
5154                                  unsigned long event, void *ptr)
5155 {
5156         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5157         struct mlx5e_flow_steering *fs;
5158         struct mlx5e_priv *peer_priv;
5159         struct mlx5e_tc_table *tc;
5160         struct mlx5e_priv *priv;
5161
5162         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5163             event != NETDEV_UNREGISTER ||
5164             ndev->reg_state == NETREG_REGISTERED)
5165                 return NOTIFY_DONE;
5166
5167         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5168         fs = container_of(tc, struct mlx5e_flow_steering, tc);
5169         priv = container_of(fs, struct mlx5e_priv, fs);
5170         peer_priv = netdev_priv(ndev);
5171         if (priv == peer_priv ||
5172             !(priv->netdev->features & NETIF_F_HW_TC))
5173                 return NOTIFY_DONE;
5174
5175         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5176
5177         return NOTIFY_DONE;
5178 }
5179
5180 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
5181 {
5182         int tc_grp_size, tc_tbl_size;
5183         u32 max_flow_counter;
5184
5185         max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
5186                             MLX5_CAP_GEN(dev, max_flow_counter_15_0);
5187
5188         tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
5189
5190         tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
5191                             BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
5192
5193         return tc_tbl_size;
5194 }
5195
5196 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5197 {
5198         struct mlx5e_tc_table *tc = &priv->fs.tc;
5199         struct mlx5_core_dev *dev = priv->mdev;
5200         struct mlx5_chains_attr attr = {};
5201         int err;
5202
5203         mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5204         mutex_init(&tc->t_lock);
5205         mutex_init(&tc->hairpin_tbl_lock);
5206         hash_init(tc->hairpin_tbl);
5207
5208         err = rhashtable_init(&tc->ht, &tc_ht_params);
5209         if (err)
5210                 return err;
5211
5212         if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
5213                 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5214                         MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5215                 attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5216         }
5217         attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5218         attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5219         attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5220         attr.default_ft = priv->fs.vlan.ft.t;
5221
5222         tc->chains = mlx5_chains_create(dev, &attr);
5223         if (IS_ERR(tc->chains)) {
5224                 err = PTR_ERR(tc->chains);
5225                 goto err_chains;
5226         }
5227
5228         tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
5229                                  MLX5_FLOW_NAMESPACE_KERNEL);
5230         if (IS_ERR(tc->ct)) {
5231                 err = PTR_ERR(tc->ct);
5232                 goto err_ct;
5233         }
5234
5235         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5236         err = register_netdevice_notifier_dev_net(priv->netdev,
5237                                                   &tc->netdevice_nb,
5238                                                   &tc->netdevice_nn);
5239         if (err) {
5240                 tc->netdevice_nb.notifier_call = NULL;
5241                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5242                 goto err_reg;
5243         }
5244
5245         return 0;
5246
5247 err_reg:
5248         mlx5_tc_ct_clean(tc->ct);
5249 err_ct:
5250         mlx5_chains_destroy(tc->chains);
5251 err_chains:
5252         rhashtable_destroy(&tc->ht);
5253         return err;
5254 }
5255
5256 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5257 {
5258         struct mlx5e_tc_flow *flow = ptr;
5259         struct mlx5e_priv *priv = flow->priv;
5260
5261         mlx5e_tc_del_flow(priv, flow);
5262         kfree(flow);
5263 }
5264
5265 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5266 {
5267         struct mlx5e_tc_table *tc = &priv->fs.tc;
5268
5269         if (tc->netdevice_nb.notifier_call)
5270                 unregister_netdevice_notifier_dev_net(priv->netdev,
5271                                                       &tc->netdevice_nb,
5272                                                       &tc->netdevice_nn);
5273
5274         mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5275         mutex_destroy(&tc->hairpin_tbl_lock);
5276
5277         rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5278
5279         if (!IS_ERR_OR_NULL(tc->t)) {
5280                 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5281                 tc->t = NULL;
5282         }
5283         mutex_destroy(&tc->t_lock);
5284
5285         mlx5_tc_ct_clean(tc->ct);
5286         mlx5_chains_destroy(tc->chains);
5287 }
5288
5289 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
5290 {
5291         const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5292         struct mlx5_rep_uplink_priv *uplink_priv;
5293         struct mlx5e_rep_priv *rpriv;
5294         struct mapping_ctx *mapping;
5295         struct mlx5_eswitch *esw;
5296         struct mlx5e_priv *priv;
5297         int err = 0;
5298
5299         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5300         rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5301         priv = netdev_priv(rpriv->netdev);
5302         esw = priv->mdev->priv.eswitch;
5303
5304         uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5305                                                esw_chains(esw),
5306                                                &esw->offloads.mod_hdr,
5307                                                MLX5_FLOW_NAMESPACE_FDB);
5308         if (IS_ERR(uplink_priv->ct_priv))
5309                 goto err_ct;
5310
5311         mapping = mapping_create(sizeof(struct tunnel_match_key),
5312                                  TUNNEL_INFO_BITS_MASK, true);
5313         if (IS_ERR(mapping)) {
5314                 err = PTR_ERR(mapping);
5315                 goto err_tun_mapping;
5316         }
5317         uplink_priv->tunnel_mapping = mapping;
5318
5319         mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
5320         if (IS_ERR(mapping)) {
5321                 err = PTR_ERR(mapping);
5322                 goto err_enc_opts_mapping;
5323         }
5324         uplink_priv->tunnel_enc_opts_mapping = mapping;
5325
5326         err = rhashtable_init(tc_ht, &tc_ht_params);
5327         if (err)
5328                 goto err_ht_init;
5329
5330         return err;
5331
5332 err_ht_init:
5333         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5334 err_enc_opts_mapping:
5335         mapping_destroy(uplink_priv->tunnel_mapping);
5336 err_tun_mapping:
5337         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5338 err_ct:
5339         netdev_warn(priv->netdev,
5340                     "Failed to initialize tc (eswitch), err: %d", err);
5341         return err;
5342 }
5343
5344 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
5345 {
5346         struct mlx5_rep_uplink_priv *uplink_priv;
5347
5348         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5349
5350         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5351
5352         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5353         mapping_destroy(uplink_priv->tunnel_mapping);
5354
5355         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5356 }
5357
5358 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5359 {
5360         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5361
5362         return atomic_read(&tc_ht->nelems);
5363 }
5364
5365 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5366 {
5367         struct mlx5e_tc_flow *flow, *tmp;
5368
5369         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5370                 __mlx5e_tc_del_fdb_peer_flow(flow);
5371 }
5372
5373 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5374 {
5375         struct mlx5_rep_uplink_priv *rpriv =
5376                 container_of(work, struct mlx5_rep_uplink_priv,
5377                              reoffload_flows_work);
5378         struct mlx5e_tc_flow *flow, *tmp;
5379
5380         mutex_lock(&rpriv->unready_flows_lock);
5381         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5382                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5383                         unready_flow_del(flow);
5384         }
5385         mutex_unlock(&rpriv->unready_flows_lock);
5386 }
5387
5388 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5389                                      struct flow_cls_offload *cls_flower,
5390                                      unsigned long flags)
5391 {
5392         switch (cls_flower->command) {
5393         case FLOW_CLS_REPLACE:
5394                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5395                                               flags);
5396         case FLOW_CLS_DESTROY:
5397                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5398                                            flags);
5399         case FLOW_CLS_STATS:
5400                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5401                                           flags);
5402         default:
5403                 return -EOPNOTSUPP;
5404         }
5405 }
5406
5407 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5408                             void *cb_priv)
5409 {
5410         unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
5411         struct mlx5e_priv *priv = cb_priv;
5412
5413         switch (type) {
5414         case TC_SETUP_CLSFLOWER:
5415                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5416         default:
5417                 return -EOPNOTSUPP;
5418         }
5419 }
5420
5421 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5422                          struct sk_buff *skb)
5423 {
5424 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5425         u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5426         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5427         struct mlx5e_tc_table *tc = &priv->fs.tc;
5428         struct tc_skb_ext *tc_skb_ext;
5429         int err;
5430
5431         reg_b = be32_to_cpu(cqe->ft_metadata);
5432
5433         chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5434
5435         err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
5436         if (err) {
5437                 netdev_dbg(priv->netdev,
5438                            "Couldn't find chain for chain tag: %d, err: %d\n",
5439                            chain_tag, err);
5440                 return false;
5441         }
5442
5443         if (chain) {
5444                 tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
5445                 if (WARN_ON(!tc_skb_ext))
5446                         return false;
5447
5448                 tc_skb_ext->chain = chain;
5449
5450                 zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
5451                                   ZONE_RESTORE_MAX;
5452
5453                 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5454                                               zone_restore_id))
5455                         return false;
5456         }
5457 #endif /* CONFIG_NET_TC_SKB_EXT */
5458
5459         return true;
5460 }