5e70e99aa1f40ddd13dcd257b9784010da9545eb
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/arp.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include "en.h"
47 #include "en/tc/post_act.h"
48 #include "en_rep.h"
49 #include "en/rep/tc.h"
50 #include "en/rep/neigh.h"
51 #include "en_tc.h"
52 #include "eswitch.h"
53 #include "fs_core.h"
54 #include "en/port.h"
55 #include "en/tc_tun.h"
56 #include "en/mapping.h"
57 #include "en/tc_ct.h"
58 #include "en/mod_hdr.h"
59 #include "en/tc_tun_encap.h"
60 #include "en/tc/sample.h"
61 #include "en/tc/act/act.h"
62 #include "en/tc/post_meter.h"
63 #include "lib/devcom.h"
64 #include "lib/geneve.h"
65 #include "lib/fs_chains.h"
66 #include "diag/en_tc_tracepoint.h"
67 #include <asm/div64.h>
68 #include "lag/lag.h"
69 #include "lag/mp.h"
70
71 #define MLX5E_TC_TABLE_NUM_GROUPS 4
72 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
73
74 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
75         [CHAIN_TO_REG] = {
76                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
77                 .moffset = 0,
78                 .mlen = 16,
79         },
80         [VPORT_TO_REG] = {
81                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
82                 .moffset = 16,
83                 .mlen = 16,
84         },
85         [TUNNEL_TO_REG] = {
86                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
87                 .moffset = 8,
88                 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
89                 .soffset = MLX5_BYTE_OFF(fte_match_param,
90                                          misc_parameters_2.metadata_reg_c_1),
91         },
92         [ZONE_TO_REG] = zone_to_reg_ct,
93         [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
94         [CTSTATE_TO_REG] = ctstate_to_reg_ct,
95         [MARK_TO_REG] = mark_to_reg_ct,
96         [LABELS_TO_REG] = labels_to_reg_ct,
97         [FTEID_TO_REG] = fteid_to_reg_ct,
98         /* For NIC rules we store the restore metadata directly
99          * into reg_b that is passed to SW since we don't
100          * jump between steering domains.
101          */
102         [NIC_CHAIN_TO_REG] = {
103                 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
104                 .moffset = 0,
105                 .mlen = 16,
106         },
107         [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
108         [PACKET_COLOR_TO_REG] = packet_color_to_reg,
109 };
110
111 /* To avoid false lock dependency warning set the tc_ht lock
112  * class different than the lock class of the ht being used when deleting
113  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
114  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
115  * it's different than the ht->mutex here.
116  */
117 static struct lock_class_key tc_ht_lock_key;
118
119 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
120 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
121
122 void
123 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
124                             enum mlx5e_tc_attr_to_reg type,
125                             u32 val,
126                             u32 mask)
127 {
128         void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
129         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
130         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
131         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
132         u32 max_mask = GENMASK(match_len - 1, 0);
133         __be32 curr_mask_be, curr_val_be;
134         u32 curr_mask, curr_val;
135
136         fmask = headers_c + soffset;
137         fval = headers_v + soffset;
138
139         memcpy(&curr_mask_be, fmask, 4);
140         memcpy(&curr_val_be, fval, 4);
141
142         curr_mask = be32_to_cpu(curr_mask_be);
143         curr_val = be32_to_cpu(curr_val_be);
144
145         //move to correct offset
146         WARN_ON(mask > max_mask);
147         mask <<= moffset;
148         val <<= moffset;
149         max_mask <<= moffset;
150
151         //zero val and mask
152         curr_mask &= ~max_mask;
153         curr_val &= ~max_mask;
154
155         //add current to mask
156         curr_mask |= mask;
157         curr_val |= val;
158
159         //back to be32 and write
160         curr_mask_be = cpu_to_be32(curr_mask);
161         curr_val_be = cpu_to_be32(curr_val);
162
163         memcpy(fmask, &curr_mask_be, 4);
164         memcpy(fval, &curr_val_be, 4);
165
166         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
167 }
168
169 void
170 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
171                                 enum mlx5e_tc_attr_to_reg type,
172                                 u32 *val,
173                                 u32 *mask)
174 {
175         void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
176         int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
177         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
178         int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
179         u32 max_mask = GENMASK(match_len - 1, 0);
180         __be32 curr_mask_be, curr_val_be;
181         u32 curr_mask, curr_val;
182
183         fmask = headers_c + soffset;
184         fval = headers_v + soffset;
185
186         memcpy(&curr_mask_be, fmask, 4);
187         memcpy(&curr_val_be, fval, 4);
188
189         curr_mask = be32_to_cpu(curr_mask_be);
190         curr_val = be32_to_cpu(curr_val_be);
191
192         *mask = (curr_mask >> moffset) & max_mask;
193         *val = (curr_val >> moffset) & max_mask;
194 }
195
196 int
197 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
198                                      struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
199                                      enum mlx5_flow_namespace_type ns,
200                                      enum mlx5e_tc_attr_to_reg type,
201                                      u32 data)
202 {
203         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
204         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
205         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
206         char *modact;
207         int err;
208
209         modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
210         if (IS_ERR(modact))
211                 return PTR_ERR(modact);
212
213         /* Firmware has 5bit length field and 0 means 32bits */
214         if (mlen == 32)
215                 mlen = 0;
216
217         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
218         MLX5_SET(set_action_in, modact, field, mfield);
219         MLX5_SET(set_action_in, modact, offset, moffset);
220         MLX5_SET(set_action_in, modact, length, mlen);
221         MLX5_SET(set_action_in, modact, data, data);
222         err = mod_hdr_acts->num_actions;
223         mod_hdr_acts->num_actions++;
224
225         return err;
226 }
227
228 struct mlx5e_tc_int_port_priv *
229 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
230 {
231         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
232         struct mlx5_rep_uplink_priv *uplink_priv;
233         struct mlx5e_rep_priv *uplink_rpriv;
234
235         if (is_mdev_switchdev_mode(priv->mdev)) {
236                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
237                 uplink_priv = &uplink_rpriv->uplink_priv;
238
239                 return uplink_priv->int_port_priv;
240         }
241
242         return NULL;
243 }
244
245 struct mlx5e_flow_meters *
246 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
247 {
248         struct mlx5_eswitch *esw = dev->priv.eswitch;
249         struct mlx5_rep_uplink_priv *uplink_priv;
250         struct mlx5e_rep_priv *uplink_rpriv;
251         struct mlx5e_priv *priv;
252
253         if (is_mdev_switchdev_mode(dev)) {
254                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
255                 uplink_priv = &uplink_rpriv->uplink_priv;
256                 priv = netdev_priv(uplink_rpriv->netdev);
257                 if (!uplink_priv->flow_meters)
258                         uplink_priv->flow_meters =
259                                 mlx5e_flow_meters_init(priv,
260                                                        MLX5_FLOW_NAMESPACE_FDB,
261                                                        uplink_priv->post_act);
262                 if (!IS_ERR(uplink_priv->flow_meters))
263                         return uplink_priv->flow_meters;
264         }
265
266         return NULL;
267 }
268
269 static struct mlx5_tc_ct_priv *
270 get_ct_priv(struct mlx5e_priv *priv)
271 {
272         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
273         struct mlx5_rep_uplink_priv *uplink_priv;
274         struct mlx5e_rep_priv *uplink_rpriv;
275
276         if (is_mdev_switchdev_mode(priv->mdev)) {
277                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
278                 uplink_priv = &uplink_rpriv->uplink_priv;
279
280                 return uplink_priv->ct_priv;
281         }
282
283         return priv->fs.tc.ct;
284 }
285
286 static struct mlx5e_tc_psample *
287 get_sample_priv(struct mlx5e_priv *priv)
288 {
289         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
290         struct mlx5_rep_uplink_priv *uplink_priv;
291         struct mlx5e_rep_priv *uplink_rpriv;
292
293         if (is_mdev_switchdev_mode(priv->mdev)) {
294                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
295                 uplink_priv = &uplink_rpriv->uplink_priv;
296
297                 return uplink_priv->tc_psample;
298         }
299
300         return NULL;
301 }
302
303 static struct mlx5e_post_act *
304 get_post_action(struct mlx5e_priv *priv)
305 {
306         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
307         struct mlx5_rep_uplink_priv *uplink_priv;
308         struct mlx5e_rep_priv *uplink_rpriv;
309
310         if (is_mdev_switchdev_mode(priv->mdev)) {
311                 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
312                 uplink_priv = &uplink_rpriv->uplink_priv;
313
314                 return uplink_priv->post_act;
315         }
316
317         return priv->fs.tc.post_act;
318 }
319
320 struct mlx5_flow_handle *
321 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
322                     struct mlx5_flow_spec *spec,
323                     struct mlx5_flow_attr *attr)
324 {
325         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
326
327         if (is_mdev_switchdev_mode(priv->mdev))
328                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
329
330         return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
331 }
332
333 void
334 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
335                     struct mlx5_flow_handle *rule,
336                     struct mlx5_flow_attr *attr)
337 {
338         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
339
340         if (is_mdev_switchdev_mode(priv->mdev)) {
341                 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
342                 return;
343         }
344
345         mlx5e_del_offloaded_nic_rule(priv, rule, attr);
346 }
347
348 static bool
349 is_flow_meter_action(struct mlx5_flow_attr *attr)
350 {
351         return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
352                 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
353 }
354
355 static int
356 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
357                         struct mlx5_flow_attr *attr)
358 {
359         struct mlx5e_flow_meter_handle *meter;
360
361         meter = mlx5e_tc_meter_get(priv->mdev, &attr->meter_attr.params);
362         if (IS_ERR(meter)) {
363                 mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
364                 return PTR_ERR(meter);
365         }
366
367         attr->meter_attr.meter = meter;
368         attr->dest_ft = mlx5e_tc_meter_get_post_meter_ft(meter->flow_meters);
369         attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
370
371         return 0;
372 }
373
374 struct mlx5_flow_handle *
375 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
376                       struct mlx5_flow_spec *spec,
377                       struct mlx5_flow_attr *attr)
378 {
379         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
380         int err;
381
382         if (attr->flags & MLX5_ATTR_FLAG_CT) {
383                 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
384                         &attr->parse_attr->mod_hdr_acts;
385
386                 return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
387                                                spec, attr,
388                                                mod_hdr_acts);
389         }
390
391         if (!is_mdev_switchdev_mode(priv->mdev))
392                 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
393
394         if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
395                 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
396
397         if (is_flow_meter_action(attr)) {
398                 err = mlx5e_tc_add_flow_meter(priv, attr);
399                 if (err)
400                         return ERR_PTR(err);
401         }
402
403         return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
404 }
405
406 void
407 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
408                         struct mlx5_flow_handle *rule,
409                         struct mlx5_flow_attr *attr)
410 {
411         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
412
413         if (attr->flags & MLX5_ATTR_FLAG_CT) {
414                 mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
415                 return;
416         }
417
418         if (!is_mdev_switchdev_mode(priv->mdev)) {
419                 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
420                 return;
421         }
422
423         if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
424                 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
425                 return;
426         }
427
428         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
429
430         if (attr->meter_attr.meter)
431                 mlx5e_tc_meter_put(attr->meter_attr.meter);
432 }
433
434 int
435 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
436                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
437                           enum mlx5_flow_namespace_type ns,
438                           enum mlx5e_tc_attr_to_reg type,
439                           u32 data)
440 {
441         int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
442
443         return ret < 0 ? ret : 0;
444 }
445
446 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
447                                           struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
448                                           enum mlx5e_tc_attr_to_reg type,
449                                           int act_id, u32 data)
450 {
451         int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
452         int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
453         int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
454         char *modact;
455
456         modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
457
458         /* Firmware has 5bit length field and 0 means 32bits */
459         if (mlen == 32)
460                 mlen = 0;
461
462         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
463         MLX5_SET(set_action_in, modact, field, mfield);
464         MLX5_SET(set_action_in, modact, offset, moffset);
465         MLX5_SET(set_action_in, modact, length, mlen);
466         MLX5_SET(set_action_in, modact, data, data);
467 }
468
469 struct mlx5e_hairpin {
470         struct mlx5_hairpin *pair;
471
472         struct mlx5_core_dev *func_mdev;
473         struct mlx5e_priv *func_priv;
474         u32 tdn;
475         struct mlx5e_tir direct_tir;
476
477         int num_channels;
478         struct mlx5e_rqt indir_rqt;
479         struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
480         struct mlx5_ttc_table *ttc;
481 };
482
483 struct mlx5e_hairpin_entry {
484         /* a node of a hash table which keeps all the  hairpin entries */
485         struct hlist_node hairpin_hlist;
486
487         /* protects flows list */
488         spinlock_t flows_lock;
489         /* flows sharing the same hairpin */
490         struct list_head flows;
491         /* hpe's that were not fully initialized when dead peer update event
492          * function traversed them.
493          */
494         struct list_head dead_peer_wait_list;
495
496         u16 peer_vhca_id;
497         u8 prio;
498         struct mlx5e_hairpin *hp;
499         refcount_t refcnt;
500         struct completion res_ready;
501 };
502
503 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
504                               struct mlx5e_tc_flow *flow);
505
506 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
507 {
508         if (!flow || !refcount_inc_not_zero(&flow->refcnt))
509                 return ERR_PTR(-EINVAL);
510         return flow;
511 }
512
513 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
514 {
515         if (refcount_dec_and_test(&flow->refcnt)) {
516                 mlx5e_tc_del_flow(priv, flow);
517                 kfree_rcu(flow, rcu_head);
518         }
519 }
520
521 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
522 {
523         return flow_flag_test(flow, ESWITCH);
524 }
525
526 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
527 {
528         return flow_flag_test(flow, FT);
529 }
530
531 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
532 {
533         return flow_flag_test(flow, OFFLOADED);
534 }
535
536 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
537 {
538         return mlx5e_is_eswitch_flow(flow) ?
539                 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
540 }
541
542 static struct mod_hdr_tbl *
543 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
544 {
545         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
546
547         return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
548                 &esw->offloads.mod_hdr :
549                 &priv->fs.tc.mod_hdr;
550 }
551
552 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
553                                 struct mlx5e_tc_flow *flow,
554                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
555 {
556         struct mlx5_modify_hdr *modify_hdr;
557         struct mlx5e_mod_hdr_handle *mh;
558
559         mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
560                                   mlx5e_get_flow_namespace(flow),
561                                   &parse_attr->mod_hdr_acts);
562         if (IS_ERR(mh))
563                 return PTR_ERR(mh);
564
565         modify_hdr = mlx5e_mod_hdr_get(mh);
566         flow->attr->modify_hdr = modify_hdr;
567         flow->mh = mh;
568
569         return 0;
570 }
571
572 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
573                                  struct mlx5e_tc_flow *flow)
574 {
575         /* flow wasn't fully initialized */
576         if (!flow->mh)
577                 return;
578
579         mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
580                              flow->mh);
581         flow->mh = NULL;
582 }
583
584 static
585 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
586 {
587         struct mlx5_core_dev *mdev;
588         struct net_device *netdev;
589         struct mlx5e_priv *priv;
590
591         netdev = dev_get_by_index(net, ifindex);
592         if (!netdev)
593                 return ERR_PTR(-ENODEV);
594
595         priv = netdev_priv(netdev);
596         mdev = priv->mdev;
597         dev_put(netdev);
598
599         /* Mirred tc action holds a refcount on the ifindex net_device (see
600          * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
601          * after dev_put(netdev), while we're in the context of adding a tc flow.
602          *
603          * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
604          * stored in a hairpin object, which exists until all flows, that refer to it, get
605          * removed.
606          *
607          * On the other hand, after a hairpin object has been created, the peer net_device may
608          * be removed/unbound while there are still some hairpin flows that are using it. This
609          * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
610          * NETDEV_UNREGISTER event of the peer net_device.
611          */
612         return mdev;
613 }
614
615 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
616 {
617         struct mlx5e_tir_builder *builder;
618         int err;
619
620         builder = mlx5e_tir_builder_alloc(false);
621         if (!builder)
622                 return -ENOMEM;
623
624         err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
625         if (err)
626                 goto out;
627
628         mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
629         err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
630         if (err)
631                 goto create_tir_err;
632
633 out:
634         mlx5e_tir_builder_free(builder);
635         return err;
636
637 create_tir_err:
638         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
639
640         goto out;
641 }
642
643 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
644 {
645         mlx5e_tir_destroy(&hp->direct_tir);
646         mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
647 }
648
649 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
650 {
651         struct mlx5e_priv *priv = hp->func_priv;
652         struct mlx5_core_dev *mdev = priv->mdev;
653         struct mlx5e_rss_params_indir *indir;
654         int err;
655
656         indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
657         if (!indir)
658                 return -ENOMEM;
659
660         mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
661         err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
662                                    mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
663                                    indir);
664
665         kvfree(indir);
666         return err;
667 }
668
669 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
670 {
671         struct mlx5e_priv *priv = hp->func_priv;
672         struct mlx5e_rss_params_hash rss_hash;
673         enum mlx5_traffic_types tt, max_tt;
674         struct mlx5e_tir_builder *builder;
675         int err = 0;
676
677         builder = mlx5e_tir_builder_alloc(false);
678         if (!builder)
679                 return -ENOMEM;
680
681         rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
682
683         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
684                 struct mlx5e_rss_params_traffic_type rss_tt;
685
686                 rss_tt = mlx5e_rss_get_default_tt_config(tt);
687
688                 mlx5e_tir_builder_build_rqt(builder, hp->tdn,
689                                             mlx5e_rqt_get_rqtn(&hp->indir_rqt),
690                                             false);
691                 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
692
693                 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
694                 if (err) {
695                         mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
696                         goto err_destroy_tirs;
697                 }
698
699                 mlx5e_tir_builder_clear(builder);
700         }
701
702 out:
703         mlx5e_tir_builder_free(builder);
704         return err;
705
706 err_destroy_tirs:
707         max_tt = tt;
708         for (tt = 0; tt < max_tt; tt++)
709                 mlx5e_tir_destroy(&hp->indir_tir[tt]);
710
711         goto out;
712 }
713
714 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
715 {
716         int tt;
717
718         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
719                 mlx5e_tir_destroy(&hp->indir_tir[tt]);
720 }
721
722 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
723                                          struct ttc_params *ttc_params)
724 {
725         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
726         int tt;
727
728         memset(ttc_params, 0, sizeof(*ttc_params));
729
730         ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
731                                                  MLX5_FLOW_NAMESPACE_KERNEL);
732         for (tt = 0; tt < MLX5_NUM_TT; tt++) {
733                 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
734                 ttc_params->dests[tt].tir_num =
735                         tt == MLX5_TT_ANY ?
736                                 mlx5e_tir_get_tirn(&hp->direct_tir) :
737                                 mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
738         }
739
740         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
741         ft_attr->prio = MLX5E_TC_PRIO;
742 }
743
744 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
745 {
746         struct mlx5e_priv *priv = hp->func_priv;
747         struct ttc_params ttc_params;
748         int err;
749
750         err = mlx5e_hairpin_create_indirect_rqt(hp);
751         if (err)
752                 return err;
753
754         err = mlx5e_hairpin_create_indirect_tirs(hp);
755         if (err)
756                 goto err_create_indirect_tirs;
757
758         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
759         hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
760         if (IS_ERR(hp->ttc)) {
761                 err = PTR_ERR(hp->ttc);
762                 goto err_create_ttc_table;
763         }
764
765         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
766                    hp->num_channels,
767                    mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
768
769         return 0;
770
771 err_create_ttc_table:
772         mlx5e_hairpin_destroy_indirect_tirs(hp);
773 err_create_indirect_tirs:
774         mlx5e_rqt_destroy(&hp->indir_rqt);
775
776         return err;
777 }
778
779 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
780 {
781         mlx5_destroy_ttc_table(hp->ttc);
782         mlx5e_hairpin_destroy_indirect_tirs(hp);
783         mlx5e_rqt_destroy(&hp->indir_rqt);
784 }
785
786 static struct mlx5e_hairpin *
787 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
788                      int peer_ifindex)
789 {
790         struct mlx5_core_dev *func_mdev, *peer_mdev;
791         struct mlx5e_hairpin *hp;
792         struct mlx5_hairpin *pair;
793         int err;
794
795         hp = kzalloc(sizeof(*hp), GFP_KERNEL);
796         if (!hp)
797                 return ERR_PTR(-ENOMEM);
798
799         func_mdev = priv->mdev;
800         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
801         if (IS_ERR(peer_mdev)) {
802                 err = PTR_ERR(peer_mdev);
803                 goto create_pair_err;
804         }
805
806         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
807         if (IS_ERR(pair)) {
808                 err = PTR_ERR(pair);
809                 goto create_pair_err;
810         }
811         hp->pair = pair;
812         hp->func_mdev = func_mdev;
813         hp->func_priv = priv;
814         hp->num_channels = params->num_channels;
815
816         err = mlx5e_hairpin_create_transport(hp);
817         if (err)
818                 goto create_transport_err;
819
820         if (hp->num_channels > 1) {
821                 err = mlx5e_hairpin_rss_init(hp);
822                 if (err)
823                         goto rss_init_err;
824         }
825
826         return hp;
827
828 rss_init_err:
829         mlx5e_hairpin_destroy_transport(hp);
830 create_transport_err:
831         mlx5_core_hairpin_destroy(hp->pair);
832 create_pair_err:
833         kfree(hp);
834         return ERR_PTR(err);
835 }
836
837 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
838 {
839         if (hp->num_channels > 1)
840                 mlx5e_hairpin_rss_cleanup(hp);
841         mlx5e_hairpin_destroy_transport(hp);
842         mlx5_core_hairpin_destroy(hp->pair);
843         kvfree(hp);
844 }
845
846 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
847 {
848         return (peer_vhca_id << 16 | prio);
849 }
850
851 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
852                                                      u16 peer_vhca_id, u8 prio)
853 {
854         struct mlx5e_hairpin_entry *hpe;
855         u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
856
857         hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
858                                hairpin_hlist, hash_key) {
859                 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
860                         refcount_inc(&hpe->refcnt);
861                         return hpe;
862                 }
863         }
864
865         return NULL;
866 }
867
868 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
869                               struct mlx5e_hairpin_entry *hpe)
870 {
871         /* no more hairpin flows for us, release the hairpin pair */
872         if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
873                 return;
874         hash_del(&hpe->hairpin_hlist);
875         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
876
877         if (!IS_ERR_OR_NULL(hpe->hp)) {
878                 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
879                            dev_name(hpe->hp->pair->peer_mdev->device));
880
881                 mlx5e_hairpin_destroy(hpe->hp);
882         }
883
884         WARN_ON(!list_empty(&hpe->flows));
885         kfree(hpe);
886 }
887
888 #define UNKNOWN_MATCH_PRIO 8
889
890 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
891                                   struct mlx5_flow_spec *spec, u8 *match_prio,
892                                   struct netlink_ext_ack *extack)
893 {
894         void *headers_c, *headers_v;
895         u8 prio_val, prio_mask = 0;
896         bool vlan_present;
897
898 #ifdef CONFIG_MLX5_CORE_EN_DCB
899         if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
900                 NL_SET_ERR_MSG_MOD(extack,
901                                    "only PCP trust state supported for hairpin");
902                 return -EOPNOTSUPP;
903         }
904 #endif
905         headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
906         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
907
908         vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
909         if (vlan_present) {
910                 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
911                 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
912         }
913
914         if (!vlan_present || !prio_mask) {
915                 prio_val = UNKNOWN_MATCH_PRIO;
916         } else if (prio_mask != 0x7) {
917                 NL_SET_ERR_MSG_MOD(extack,
918                                    "masked priority match not supported for hairpin");
919                 return -EOPNOTSUPP;
920         }
921
922         *match_prio = prio_val;
923         return 0;
924 }
925
926 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
927                                   struct mlx5e_tc_flow *flow,
928                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
929                                   struct netlink_ext_ack *extack)
930 {
931         int peer_ifindex = parse_attr->mirred_ifindex[0];
932         struct mlx5_hairpin_params params;
933         struct mlx5_core_dev *peer_mdev;
934         struct mlx5e_hairpin_entry *hpe;
935         struct mlx5e_hairpin *hp;
936         u64 link_speed64;
937         u32 link_speed;
938         u8 match_prio;
939         u16 peer_id;
940         int err;
941
942         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
943         if (IS_ERR(peer_mdev)) {
944                 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
945                 return PTR_ERR(peer_mdev);
946         }
947
948         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
949                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
950                 return -EOPNOTSUPP;
951         }
952
953         peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
954         err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
955                                      extack);
956         if (err)
957                 return err;
958
959         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
960         hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
961         if (hpe) {
962                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
963                 wait_for_completion(&hpe->res_ready);
964
965                 if (IS_ERR(hpe->hp)) {
966                         err = -EREMOTEIO;
967                         goto out_err;
968                 }
969                 goto attach_flow;
970         }
971
972         hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
973         if (!hpe) {
974                 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
975                 return -ENOMEM;
976         }
977
978         spin_lock_init(&hpe->flows_lock);
979         INIT_LIST_HEAD(&hpe->flows);
980         INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
981         hpe->peer_vhca_id = peer_id;
982         hpe->prio = match_prio;
983         refcount_set(&hpe->refcnt, 1);
984         init_completion(&hpe->res_ready);
985
986         hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
987                  hash_hairpin_info(peer_id, match_prio));
988         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
989
990         params.log_data_size = 16;
991         params.log_data_size = min_t(u8, params.log_data_size,
992                                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
993         params.log_data_size = max_t(u8, params.log_data_size,
994                                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
995
996         params.log_num_packets = params.log_data_size -
997                                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
998         params.log_num_packets = min_t(u8, params.log_num_packets,
999                                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
1000
1001         params.q_counter = priv->q_counter;
1002         /* set hairpin pair per each 50Gbs share of the link */
1003         mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
1004         link_speed = max_t(u32, link_speed, 50000);
1005         link_speed64 = link_speed;
1006         do_div(link_speed64, 50000);
1007         params.num_channels = link_speed64;
1008
1009         hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
1010         hpe->hp = hp;
1011         complete_all(&hpe->res_ready);
1012         if (IS_ERR(hp)) {
1013                 err = PTR_ERR(hp);
1014                 goto out_err;
1015         }
1016
1017         netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1018                    mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1019                    dev_name(hp->pair->peer_mdev->device),
1020                    hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1021
1022 attach_flow:
1023         if (hpe->hp->num_channels > 1) {
1024                 flow_flag_set(flow, HAIRPIN_RSS);
1025                 flow->attr->nic_attr->hairpin_ft =
1026                         mlx5_get_ttc_flow_table(hpe->hp->ttc);
1027         } else {
1028                 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1029         }
1030
1031         flow->hpe = hpe;
1032         spin_lock(&hpe->flows_lock);
1033         list_add(&flow->hairpin, &hpe->flows);
1034         spin_unlock(&hpe->flows_lock);
1035
1036         return 0;
1037
1038 out_err:
1039         mlx5e_hairpin_put(priv, hpe);
1040         return err;
1041 }
1042
1043 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1044                                    struct mlx5e_tc_flow *flow)
1045 {
1046         /* flow wasn't fully initialized */
1047         if (!flow->hpe)
1048                 return;
1049
1050         spin_lock(&flow->hpe->flows_lock);
1051         list_del(&flow->hairpin);
1052         spin_unlock(&flow->hpe->flows_lock);
1053
1054         mlx5e_hairpin_put(priv, flow->hpe);
1055         flow->hpe = NULL;
1056 }
1057
1058 struct mlx5_flow_handle *
1059 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1060                              struct mlx5_flow_spec *spec,
1061                              struct mlx5_flow_attr *attr)
1062 {
1063         struct mlx5_flow_context *flow_context = &spec->flow_context;
1064         struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv);
1065         struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1066         struct mlx5e_tc_table *tc = &priv->fs.tc;
1067         struct mlx5_flow_destination dest[2] = {};
1068         struct mlx5_flow_act flow_act = {
1069                 .action = attr->action,
1070                 .flags    = FLOW_ACT_NO_APPEND,
1071         };
1072         struct mlx5_flow_handle *rule;
1073         struct mlx5_flow_table *ft;
1074         int dest_ix = 0;
1075
1076         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1077         flow_context->flow_tag = nic_attr->flow_tag;
1078
1079         if (attr->dest_ft) {
1080                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1081                 dest[dest_ix].ft = attr->dest_ft;
1082                 dest_ix++;
1083         } else if (nic_attr->hairpin_ft) {
1084                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1085                 dest[dest_ix].ft = nic_attr->hairpin_ft;
1086                 dest_ix++;
1087         } else if (nic_attr->hairpin_tirn) {
1088                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1089                 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1090                 dest_ix++;
1091         } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1092                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1093                 if (attr->dest_chain) {
1094                         dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1095                                                                  attr->dest_chain, 1,
1096                                                                  MLX5E_TC_FT_LEVEL);
1097                         if (IS_ERR(dest[dest_ix].ft))
1098                                 return ERR_CAST(dest[dest_ix].ft);
1099                 } else {
1100                         dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
1101                 }
1102                 dest_ix++;
1103         }
1104
1105         if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1106             MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1107                 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1108
1109         if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1110                 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1111                 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1112                 dest_ix++;
1113         }
1114
1115         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1116                 flow_act.modify_hdr = attr->modify_hdr;
1117
1118         mutex_lock(&tc->t_lock);
1119         if (IS_ERR_OR_NULL(tc->t)) {
1120                 /* Create the root table here if doesn't exist yet */
1121                 tc->t =
1122                         mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1123
1124                 if (IS_ERR(tc->t)) {
1125                         mutex_unlock(&tc->t_lock);
1126                         netdev_err(priv->netdev,
1127                                    "Failed to create tc offload table\n");
1128                         rule = ERR_CAST(priv->fs.tc.t);
1129                         goto err_ft_get;
1130                 }
1131         }
1132         mutex_unlock(&tc->t_lock);
1133
1134         if (attr->chain || attr->prio)
1135                 ft = mlx5_chains_get_table(nic_chains,
1136                                            attr->chain, attr->prio,
1137                                            MLX5E_TC_FT_LEVEL);
1138         else
1139                 ft = attr->ft;
1140
1141         if (IS_ERR(ft)) {
1142                 rule = ERR_CAST(ft);
1143                 goto err_ft_get;
1144         }
1145
1146         if (attr->outer_match_level != MLX5_MATCH_NONE)
1147                 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1148
1149         rule = mlx5_add_flow_rules(ft, spec,
1150                                    &flow_act, dest, dest_ix);
1151         if (IS_ERR(rule))
1152                 goto err_rule;
1153
1154         return rule;
1155
1156 err_rule:
1157         if (attr->chain || attr->prio)
1158                 mlx5_chains_put_table(nic_chains,
1159                                       attr->chain, attr->prio,
1160                                       MLX5E_TC_FT_LEVEL);
1161 err_ft_get:
1162         if (attr->dest_chain)
1163                 mlx5_chains_put_table(nic_chains,
1164                                       attr->dest_chain, 1,
1165                                       MLX5E_TC_FT_LEVEL);
1166
1167         return ERR_CAST(rule);
1168 }
1169
1170 static int
1171 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1172                         struct mlx5_flow_attr *attr)
1173
1174 {
1175         struct mlx5_fc *counter;
1176
1177         counter = mlx5_fc_create(counter_dev, true);
1178         if (IS_ERR(counter))
1179                 return PTR_ERR(counter);
1180
1181         attr->counter = counter;
1182         return 0;
1183 }
1184
1185 static int
1186 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1187                       struct mlx5e_tc_flow *flow,
1188                       struct netlink_ext_ack *extack)
1189 {
1190         struct mlx5e_tc_flow_parse_attr *parse_attr;
1191         struct mlx5_flow_attr *attr = flow->attr;
1192         struct mlx5_core_dev *dev = priv->mdev;
1193         int err;
1194
1195         parse_attr = attr->parse_attr;
1196
1197         if (flow_flag_test(flow, HAIRPIN)) {
1198                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1199                 if (err)
1200                         return err;
1201         }
1202
1203         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1204                 err = alloc_flow_attr_counter(dev, attr);
1205                 if (err)
1206                         return err;
1207         }
1208
1209         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1210                 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1211                 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
1212                 if (err)
1213                         return err;
1214         }
1215
1216         if (attr->flags & MLX5_ATTR_FLAG_CT)
1217                 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
1218                                                         attr, &parse_attr->mod_hdr_acts);
1219         else
1220                 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1221                                                              attr);
1222
1223         return PTR_ERR_OR_ZERO(flow->rule[0]);
1224 }
1225
1226 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1227                                   struct mlx5_flow_handle *rule,
1228                                   struct mlx5_flow_attr *attr)
1229 {
1230         struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv);
1231
1232         mlx5_del_flow_rules(rule);
1233
1234         if (attr->chain || attr->prio)
1235                 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1236                                       MLX5E_TC_FT_LEVEL);
1237
1238         if (attr->dest_chain)
1239                 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1240                                       MLX5E_TC_FT_LEVEL);
1241 }
1242
1243 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1244                                   struct mlx5e_tc_flow *flow)
1245 {
1246         struct mlx5_flow_attr *attr = flow->attr;
1247         struct mlx5e_tc_table *tc = &priv->fs.tc;
1248
1249         flow_flag_clear(flow, OFFLOADED);
1250
1251         if (attr->flags & MLX5_ATTR_FLAG_CT)
1252                 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1253         else if (!IS_ERR_OR_NULL(flow->rule[0]))
1254                 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1255
1256         /* Remove root table if no rules are left to avoid
1257          * extra steering hops.
1258          */
1259         mutex_lock(&priv->fs.tc.t_lock);
1260         if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1261             !IS_ERR_OR_NULL(tc->t)) {
1262                 mlx5_chains_put_table(mlx5e_nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1263                 priv->fs.tc.t = NULL;
1264         }
1265         mutex_unlock(&priv->fs.tc.t_lock);
1266
1267         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1268                 mlx5e_detach_mod_hdr(priv, flow);
1269
1270         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1271                 mlx5_fc_destroy(priv->mdev, attr->counter);
1272
1273         if (flow_flag_test(flow, HAIRPIN))
1274                 mlx5e_hairpin_flow_del(priv, flow);
1275
1276         free_flow_post_acts(flow);
1277
1278         kvfree(attr->parse_attr);
1279         kfree(flow->attr);
1280 }
1281
1282 struct mlx5_flow_handle *
1283 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1284                            struct mlx5e_tc_flow *flow,
1285                            struct mlx5_flow_spec *spec,
1286                            struct mlx5_flow_attr *attr)
1287 {
1288         struct mlx5_flow_handle *rule;
1289
1290         if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1291                 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1292
1293         rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1294
1295         if (IS_ERR(rule))
1296                 return rule;
1297
1298         if (attr->esw_attr->split_count) {
1299                 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1300                 if (IS_ERR(flow->rule[1]))
1301                         goto err_rule1;
1302         }
1303
1304         return rule;
1305
1306 err_rule1:
1307         mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1308         return flow->rule[1];
1309 }
1310
1311 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1312                                   struct mlx5e_tc_flow *flow,
1313                                   struct mlx5_flow_attr *attr)
1314 {
1315         flow_flag_clear(flow, OFFLOADED);
1316
1317         if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1318                 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1319
1320         if (attr->esw_attr->split_count)
1321                 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1322
1323         mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1324 }
1325
1326 struct mlx5_flow_handle *
1327 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1328                               struct mlx5e_tc_flow *flow,
1329                               struct mlx5_flow_spec *spec)
1330 {
1331         struct mlx5_flow_attr *slow_attr;
1332         struct mlx5_flow_handle *rule;
1333
1334         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1335         if (!slow_attr)
1336                 return ERR_PTR(-ENOMEM);
1337
1338         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1339         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1340         slow_attr->esw_attr->split_count = 0;
1341         slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1342
1343         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1344         if (!IS_ERR(rule))
1345                 flow_flag_set(flow, SLOW);
1346
1347         kfree(slow_attr);
1348
1349         return rule;
1350 }
1351
1352 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1353                                        struct mlx5e_tc_flow *flow)
1354 {
1355         struct mlx5_flow_attr *slow_attr;
1356
1357         slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1358         if (!slow_attr) {
1359                 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1360                 return;
1361         }
1362
1363         memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1364         slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1365         slow_attr->esw_attr->split_count = 0;
1366         slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1367         mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1368         flow_flag_clear(flow, SLOW);
1369         kfree(slow_attr);
1370 }
1371
1372 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1373  * function.
1374  */
1375 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1376                              struct list_head *unready_flows)
1377 {
1378         flow_flag_set(flow, NOT_READY);
1379         list_add_tail(&flow->unready, unready_flows);
1380 }
1381
1382 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1383  * function.
1384  */
1385 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1386 {
1387         list_del(&flow->unready);
1388         flow_flag_clear(flow, NOT_READY);
1389 }
1390
1391 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1392 {
1393         struct mlx5_rep_uplink_priv *uplink_priv;
1394         struct mlx5e_rep_priv *rpriv;
1395         struct mlx5_eswitch *esw;
1396
1397         esw = flow->priv->mdev->priv.eswitch;
1398         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1399         uplink_priv = &rpriv->uplink_priv;
1400
1401         mutex_lock(&uplink_priv->unready_flows_lock);
1402         unready_flow_add(flow, &uplink_priv->unready_flows);
1403         mutex_unlock(&uplink_priv->unready_flows_lock);
1404 }
1405
1406 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1407 {
1408         struct mlx5_rep_uplink_priv *uplink_priv;
1409         struct mlx5e_rep_priv *rpriv;
1410         struct mlx5_eswitch *esw;
1411
1412         esw = flow->priv->mdev->priv.eswitch;
1413         rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1414         uplink_priv = &rpriv->uplink_priv;
1415
1416         mutex_lock(&uplink_priv->unready_flows_lock);
1417         unready_flow_del(flow);
1418         mutex_unlock(&uplink_priv->unready_flows_lock);
1419 }
1420
1421 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1422 {
1423         struct mlx5_core_dev *out_mdev, *route_mdev;
1424         struct mlx5e_priv *out_priv, *route_priv;
1425
1426         out_priv = netdev_priv(out_dev);
1427         out_mdev = out_priv->mdev;
1428         route_priv = netdev_priv(route_dev);
1429         route_mdev = route_priv->mdev;
1430
1431         if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1432             route_mdev->coredev_type != MLX5_COREDEV_VF)
1433                 return false;
1434
1435         return mlx5e_same_hw_devs(out_priv, route_priv);
1436 }
1437
1438 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1439 {
1440         struct mlx5e_priv *out_priv, *route_priv;
1441         struct mlx5_devcom *devcom = NULL;
1442         struct mlx5_core_dev *route_mdev;
1443         struct mlx5_eswitch *esw;
1444         u16 vhca_id;
1445         int err;
1446
1447         out_priv = netdev_priv(out_dev);
1448         esw = out_priv->mdev->priv.eswitch;
1449         route_priv = netdev_priv(route_dev);
1450         route_mdev = route_priv->mdev;
1451
1452         vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1453         if (mlx5_lag_is_active(out_priv->mdev)) {
1454                 /* In lag case we may get devices from different eswitch instances.
1455                  * If we failed to get vport num, it means, mostly, that we on the wrong
1456                  * eswitch.
1457                  */
1458                 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1459                 if (err != -ENOENT)
1460                         return err;
1461
1462                 devcom = out_priv->mdev->priv.devcom;
1463                 esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1464                 if (!esw)
1465                         return -ENODEV;
1466         }
1467
1468         err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1469         if (devcom)
1470                 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1471         return err;
1472 }
1473
1474 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
1475                               struct mlx5e_tc_flow *flow,
1476                               struct mlx5_flow_attr *attr)
1477 {
1478         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1479         struct mlx5_modify_hdr *mod_hdr;
1480
1481         mod_hdr = mlx5_modify_header_alloc(priv->mdev,
1482                                            mlx5e_get_flow_namespace(flow),
1483                                            mod_hdr_acts->num_actions,
1484                                            mod_hdr_acts->actions);
1485         if (IS_ERR(mod_hdr))
1486                 return PTR_ERR(mod_hdr);
1487
1488         WARN_ON(attr->modify_hdr);
1489         attr->modify_hdr = mod_hdr;
1490
1491         return 0;
1492 }
1493
1494 static int
1495 set_encap_dests(struct mlx5e_priv *priv,
1496                 struct mlx5e_tc_flow *flow,
1497                 struct mlx5_flow_attr *attr,
1498                 struct netlink_ext_ack *extack,
1499                 bool *encap_valid,
1500                 bool *vf_tun)
1501 {
1502         struct mlx5e_tc_flow_parse_attr *parse_attr;
1503         struct mlx5_esw_flow_attr *esw_attr;
1504         struct net_device *encap_dev = NULL;
1505         struct mlx5e_rep_priv *rpriv;
1506         struct mlx5e_priv *out_priv;
1507         int out_index;
1508         int err = 0;
1509
1510         if (!mlx5e_is_eswitch_flow(flow))
1511                 return 0;
1512
1513         parse_attr = attr->parse_attr;
1514         esw_attr = attr->esw_attr;
1515         *vf_tun = false;
1516         *encap_valid = true;
1517
1518         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1519                 struct net_device *out_dev;
1520                 int mirred_ifindex;
1521
1522                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1523                         continue;
1524
1525                 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1526                 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1527                 if (!out_dev) {
1528                         NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1529                         err = -ENODEV;
1530                         goto out;
1531                 }
1532                 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1533                                          extack, &encap_dev, encap_valid);
1534                 dev_put(out_dev);
1535                 if (err)
1536                         goto out;
1537
1538                 if (esw_attr->dests[out_index].flags &
1539                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1540                     !esw_attr->dest_int_port)
1541                         *vf_tun = true;
1542
1543                 out_priv = netdev_priv(encap_dev);
1544                 rpriv = out_priv->ppriv;
1545                 esw_attr->dests[out_index].rep = rpriv->rep;
1546                 esw_attr->dests[out_index].mdev = out_priv->mdev;
1547         }
1548
1549         if (*vf_tun && esw_attr->out_count > 1) {
1550                 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1551                 err = -EOPNOTSUPP;
1552                 goto out;
1553         }
1554
1555 out:
1556         return err;
1557 }
1558
1559 static void
1560 clean_encap_dests(struct mlx5e_priv *priv,
1561                   struct mlx5e_tc_flow *flow,
1562                   struct mlx5_flow_attr *attr,
1563                   bool *vf_tun)
1564 {
1565         struct mlx5_esw_flow_attr *esw_attr;
1566         int out_index;
1567
1568         if (!mlx5e_is_eswitch_flow(flow))
1569                 return;
1570
1571         esw_attr = attr->esw_attr;
1572         *vf_tun = false;
1573
1574         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1575                 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1576                         continue;
1577
1578                 if (esw_attr->dests[out_index].flags &
1579                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1580                     !esw_attr->dest_int_port)
1581                         *vf_tun = true;
1582
1583                 mlx5e_detach_encap(priv, flow, attr, out_index);
1584                 kfree(attr->parse_attr->tun_info[out_index]);
1585         }
1586 }
1587
1588 static int
1589 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1590                       struct mlx5e_tc_flow *flow,
1591                       struct netlink_ext_ack *extack)
1592 {
1593         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1594         struct mlx5e_tc_flow_parse_attr *parse_attr;
1595         struct mlx5_flow_attr *attr = flow->attr;
1596         struct mlx5_esw_flow_attr *esw_attr;
1597         bool vf_tun, encap_valid;
1598         u32 max_prio, max_chain;
1599         int err = 0;
1600
1601         parse_attr = attr->parse_attr;
1602         esw_attr = attr->esw_attr;
1603
1604         /* We check chain range only for tc flows.
1605          * For ft flows, we checked attr->chain was originally 0 and set it to
1606          * FDB_FT_CHAIN which is outside tc range.
1607          * See mlx5e_rep_setup_ft_cb().
1608          */
1609         max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1610         if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1611                 NL_SET_ERR_MSG_MOD(extack,
1612                                    "Requested chain is out of supported range");
1613                 err = -EOPNOTSUPP;
1614                 goto err_out;
1615         }
1616
1617         max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1618         if (attr->prio > max_prio) {
1619                 NL_SET_ERR_MSG_MOD(extack,
1620                                    "Requested priority is out of supported range");
1621                 err = -EOPNOTSUPP;
1622                 goto err_out;
1623         }
1624
1625         if (flow_flag_test(flow, TUN_RX)) {
1626                 err = mlx5e_attach_decap_route(priv, flow);
1627                 if (err)
1628                         goto err_out;
1629
1630                 if (!attr->chain && esw_attr->int_port &&
1631                     attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1632                         /* If decap route device is internal port, change the
1633                          * source vport value in reg_c0 back to uplink just in
1634                          * case the rule performs goto chain > 0. If we have a miss
1635                          * on chain > 0 we want the metadata regs to hold the
1636                          * chain id so SW will resume handling of this packet
1637                          * from the proper chain.
1638                          */
1639                         u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1640                                                                         esw_attr->in_rep->vport);
1641
1642                         err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1643                                                         MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1644                                                         metadata);
1645                         if (err)
1646                                 goto err_out;
1647
1648                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1649                 }
1650         }
1651
1652         if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1653                 err = mlx5e_attach_decap(priv, flow, extack);
1654                 if (err)
1655                         goto err_out;
1656         }
1657
1658         if (netif_is_ovs_master(parse_attr->filter_dev)) {
1659                 struct mlx5e_tc_int_port *int_port;
1660
1661                 if (attr->chain) {
1662                         NL_SET_ERR_MSG_MOD(extack,
1663                                            "Internal port rule is only supported on chain 0");
1664                         err = -EOPNOTSUPP;
1665                         goto err_out;
1666                 }
1667
1668                 if (attr->dest_chain) {
1669                         NL_SET_ERR_MSG_MOD(extack,
1670                                            "Internal port rule offload doesn't support goto action");
1671                         err = -EOPNOTSUPP;
1672                         goto err_out;
1673                 }
1674
1675                 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1676                                                  parse_attr->filter_dev->ifindex,
1677                                                  flow_flag_test(flow, EGRESS) ?
1678                                                  MLX5E_TC_INT_PORT_EGRESS :
1679                                                  MLX5E_TC_INT_PORT_INGRESS);
1680                 if (IS_ERR(int_port)) {
1681                         err = PTR_ERR(int_port);
1682                         goto err_out;
1683                 }
1684
1685                 esw_attr->int_port = int_port;
1686         }
1687
1688         err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
1689         if (err)
1690                 goto err_out;
1691
1692         err = mlx5_eswitch_add_vlan_action(esw, attr);
1693         if (err)
1694                 goto err_out;
1695
1696         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1697                 if (vf_tun) {
1698                         err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1699                         if (err)
1700                                 goto err_out;
1701                 } else {
1702                         err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1703                         if (err)
1704                                 goto err_out;
1705                 }
1706         }
1707
1708         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1709                 err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
1710                 if (err)
1711                         goto err_out;
1712         }
1713
1714         /* we get here if one of the following takes place:
1715          * (1) there's no error
1716          * (2) there's an encap action and we don't have valid neigh
1717          */
1718         if (!encap_valid || flow_flag_test(flow, SLOW))
1719                 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1720         else
1721                 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1722
1723         if (IS_ERR(flow->rule[0])) {
1724                 err = PTR_ERR(flow->rule[0]);
1725                 goto err_out;
1726         }
1727         flow_flag_set(flow, OFFLOADED);
1728
1729         return 0;
1730
1731 err_out:
1732         flow_flag_set(flow, FAILED);
1733         return err;
1734 }
1735
1736 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1737 {
1738         struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1739         void *headers_v = MLX5_ADDR_OF(fte_match_param,
1740                                        spec->match_value,
1741                                        misc_parameters_3);
1742         u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1743                                              headers_v,
1744                                              geneve_tlv_option_0_data);
1745
1746         return !!geneve_tlv_opt_0_data;
1747 }
1748
1749 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1750                                   struct mlx5e_tc_flow *flow)
1751 {
1752         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1753         struct mlx5_flow_attr *attr = flow->attr;
1754         struct mlx5_esw_flow_attr *esw_attr;
1755         bool vf_tun;
1756
1757         esw_attr = attr->esw_attr;
1758         mlx5e_put_flow_tunnel_id(flow);
1759
1760         if (flow_flag_test(flow, NOT_READY))
1761                 remove_unready_flow(flow);
1762
1763         if (mlx5e_is_offloaded_flow(flow)) {
1764                 if (flow_flag_test(flow, SLOW))
1765                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1766                 else
1767                         mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1768         }
1769         complete_all(&flow->del_hw_done);
1770
1771         if (mlx5_flow_has_geneve_opt(flow))
1772                 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1773
1774         mlx5_eswitch_del_vlan_action(esw, attr);
1775
1776         if (flow->decap_route)
1777                 mlx5e_detach_decap_route(priv, flow);
1778
1779         clean_encap_dests(priv, flow, attr, &vf_tun);
1780
1781         mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1782
1783         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1784                 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1785                 if (vf_tun && attr->modify_hdr)
1786                         mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1787                 else
1788                         mlx5e_detach_mod_hdr(priv, flow);
1789         }
1790
1791         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1792                 mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
1793
1794         if (esw_attr->int_port)
1795                 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
1796
1797         if (esw_attr->dest_int_port)
1798                 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
1799
1800         if (flow_flag_test(flow, L3_TO_L2_DECAP))
1801                 mlx5e_detach_decap(priv, flow);
1802
1803         free_flow_post_acts(flow);
1804
1805         if (flow->attr->lag.count)
1806                 mlx5_lag_del_mpesw_rule(esw->dev);
1807
1808         kvfree(attr->esw_attr->rx_tun_attr);
1809         kvfree(attr->parse_attr);
1810         kfree(flow->attr);
1811 }
1812
1813 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1814 {
1815         struct mlx5_flow_attr *attr;
1816
1817         attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
1818         return attr->counter;
1819 }
1820
1821 /* Iterate over tmp_list of flows attached to flow_list head. */
1822 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1823 {
1824         struct mlx5e_tc_flow *flow, *tmp;
1825
1826         list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1827                 mlx5e_flow_put(priv, flow);
1828 }
1829
1830 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1831 {
1832         struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1833
1834         if (!flow_flag_test(flow, ESWITCH) ||
1835             !flow_flag_test(flow, DUP))
1836                 return;
1837
1838         mutex_lock(&esw->offloads.peer_mutex);
1839         list_del(&flow->peer);
1840         mutex_unlock(&esw->offloads.peer_mutex);
1841
1842         flow_flag_clear(flow, DUP);
1843
1844         if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1845                 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1846                 kfree(flow->peer_flow);
1847         }
1848
1849         flow->peer_flow = NULL;
1850 }
1851
1852 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1853 {
1854         struct mlx5_core_dev *dev = flow->priv->mdev;
1855         struct mlx5_devcom *devcom = dev->priv.devcom;
1856         struct mlx5_eswitch *peer_esw;
1857
1858         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1859         if (!peer_esw)
1860                 return;
1861
1862         __mlx5e_tc_del_fdb_peer_flow(flow);
1863         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1864 }
1865
1866 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1867                               struct mlx5e_tc_flow *flow)
1868 {
1869         if (mlx5e_is_eswitch_flow(flow)) {
1870                 mlx5e_tc_del_fdb_peer_flow(flow);
1871                 mlx5e_tc_del_fdb_flow(priv, flow);
1872         } else {
1873                 mlx5e_tc_del_nic_flow(priv, flow);
1874         }
1875 }
1876
1877 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
1878 {
1879         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1880         struct flow_action *flow_action = &rule->action;
1881         const struct flow_action_entry *act;
1882         int i;
1883
1884         if (chain)
1885                 return false;
1886
1887         flow_action_for_each(i, act, flow_action) {
1888                 switch (act->id) {
1889                 case FLOW_ACTION_GOTO:
1890                         return true;
1891                 case FLOW_ACTION_SAMPLE:
1892                         return true;
1893                 default:
1894                         continue;
1895                 }
1896         }
1897
1898         return false;
1899 }
1900
1901 static int
1902 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1903                                     struct flow_dissector_key_enc_opts *opts,
1904                                     struct netlink_ext_ack *extack,
1905                                     bool *dont_care)
1906 {
1907         struct geneve_opt *opt;
1908         int off = 0;
1909
1910         *dont_care = true;
1911
1912         while (opts->len > off) {
1913                 opt = (struct geneve_opt *)&opts->data[off];
1914
1915                 if (!(*dont_care) || opt->opt_class || opt->type ||
1916                     memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1917                         *dont_care = false;
1918
1919                         if (opt->opt_class != htons(U16_MAX) ||
1920                             opt->type != U8_MAX) {
1921                                 NL_SET_ERR_MSG_MOD(extack,
1922                                                    "Partial match of tunnel options in chain > 0 isn't supported");
1923                                 netdev_warn(priv->netdev,
1924                                             "Partial match of tunnel options in chain > 0 isn't supported");
1925                                 return -EOPNOTSUPP;
1926                         }
1927                 }
1928
1929                 off += sizeof(struct geneve_opt) + opt->length * 4;
1930         }
1931
1932         return 0;
1933 }
1934
1935 #define COPY_DISSECTOR(rule, diss_key, dst)\
1936 ({ \
1937         struct flow_rule *__rule = (rule);\
1938         typeof(dst) __dst = dst;\
1939 \
1940         memcpy(__dst,\
1941                skb_flow_dissector_target(__rule->match.dissector,\
1942                                          diss_key,\
1943                                          __rule->match.key),\
1944                sizeof(*__dst));\
1945 })
1946
1947 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1948                                     struct mlx5e_tc_flow *flow,
1949                                     struct flow_cls_offload *f,
1950                                     struct net_device *filter_dev)
1951 {
1952         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1953         struct netlink_ext_ack *extack = f->common.extack;
1954         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1955         struct flow_match_enc_opts enc_opts_match;
1956         struct tunnel_match_enc_opts tun_enc_opts;
1957         struct mlx5_rep_uplink_priv *uplink_priv;
1958         struct mlx5_flow_attr *attr = flow->attr;
1959         struct mlx5e_rep_priv *uplink_rpriv;
1960         struct tunnel_match_key tunnel_key;
1961         bool enc_opts_is_dont_care = true;
1962         u32 tun_id, enc_opts_id = 0;
1963         struct mlx5_eswitch *esw;
1964         u32 value, mask;
1965         int err;
1966
1967         esw = priv->mdev->priv.eswitch;
1968         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1969         uplink_priv = &uplink_rpriv->uplink_priv;
1970
1971         memset(&tunnel_key, 0, sizeof(tunnel_key));
1972         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1973                        &tunnel_key.enc_control);
1974         if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1975                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1976                                &tunnel_key.enc_ipv4);
1977         else
1978                 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1979                                &tunnel_key.enc_ipv6);
1980         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1981         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1982                        &tunnel_key.enc_tp);
1983         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1984                        &tunnel_key.enc_key_id);
1985         tunnel_key.filter_ifindex = filter_dev->ifindex;
1986
1987         err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1988         if (err)
1989                 return err;
1990
1991         flow_rule_match_enc_opts(rule, &enc_opts_match);
1992         err = enc_opts_is_dont_care_or_full_match(priv,
1993                                                   enc_opts_match.mask,
1994                                                   extack,
1995                                                   &enc_opts_is_dont_care);
1996         if (err)
1997                 goto err_enc_opts;
1998
1999         if (!enc_opts_is_dont_care) {
2000                 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2001                 memcpy(&tun_enc_opts.key, enc_opts_match.key,
2002                        sizeof(*enc_opts_match.key));
2003                 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2004                        sizeof(*enc_opts_match.mask));
2005
2006                 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2007                                   &tun_enc_opts, &enc_opts_id);
2008                 if (err)
2009                         goto err_enc_opts;
2010         }
2011
2012         value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2013         mask = enc_opts_id ? TUNNEL_ID_MASK :
2014                              (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2015
2016         if (attr->chain) {
2017                 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2018                                             TUNNEL_TO_REG, value, mask);
2019         } else {
2020                 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2021                 err = mlx5e_tc_match_to_reg_set(priv->mdev,
2022                                                 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2023                                                 TUNNEL_TO_REG, value);
2024                 if (err)
2025                         goto err_set;
2026
2027                 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2028         }
2029
2030         flow->attr->tunnel_id = value;
2031         return 0;
2032
2033 err_set:
2034         if (enc_opts_id)
2035                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2036                                enc_opts_id);
2037 err_enc_opts:
2038         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2039         return err;
2040 }
2041
2042 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2043 {
2044         u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2045         u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2046         struct mlx5_rep_uplink_priv *uplink_priv;
2047         struct mlx5e_rep_priv *uplink_rpriv;
2048         struct mlx5_eswitch *esw;
2049
2050         esw = flow->priv->mdev->priv.eswitch;
2051         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2052         uplink_priv = &uplink_rpriv->uplink_priv;
2053
2054         if (tun_id)
2055                 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2056         if (enc_opts_id)
2057                 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2058                                enc_opts_id);
2059 }
2060
2061 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2062                             struct flow_match_basic *match, bool outer,
2063                             void *headers_c, void *headers_v)
2064 {
2065         bool ip_version_cap;
2066
2067         ip_version_cap = outer ?
2068                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2069                                           ft_field_support.outer_ip_version) :
2070                 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2071                                           ft_field_support.inner_ip_version);
2072
2073         if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2074             (match->key->n_proto == htons(ETH_P_IP) ||
2075              match->key->n_proto == htons(ETH_P_IPV6))) {
2076                 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2077                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2078                          match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2079         } else {
2080                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2081                          ntohs(match->mask->n_proto));
2082                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2083                          ntohs(match->key->n_proto));
2084         }
2085 }
2086
2087 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2088 {
2089         void *headers_v;
2090         u16 ethertype;
2091         u8 ip_version;
2092
2093         if (outer)
2094                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2095         else
2096                 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2097
2098         ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2099         /* Return ip_version converted from ethertype anyway */
2100         if (!ip_version) {
2101                 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2102                 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2103                         ip_version = 4;
2104                 else if (ethertype == ETH_P_IPV6)
2105                         ip_version = 6;
2106         }
2107         return ip_version;
2108 }
2109
2110 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2111  * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2112  *      +---------+----------------------------------------+
2113  *      |Arriving |         Arriving Outer Header          |
2114  *      |   Inner +---------+---------+---------+----------+
2115  *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
2116  *      +---------+---------+---------+---------+----------+
2117  *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
2118  *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
2119  *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
2120  *      |    CE   |   CE    |  CE     | CE      |   CE     |
2121  *      +---------+---------+---------+---------+----------+
2122  *
2123  * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2124  * the inner ip_ecn value before hardware decap action.
2125  *
2126  * Cells marked are changed from original inner packet ip_ecn value during decap, and
2127  * so matching those values on inner ip_ecn before decap will fail.
2128  *
2129  * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2130  * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2131  * and such we can drop the inner ip_ecn=CE match.
2132  */
2133
2134 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2135                                       struct flow_cls_offload *f,
2136                                       bool *match_inner_ecn)
2137 {
2138         u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2139         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2140         struct netlink_ext_ack *extack = f->common.extack;
2141         struct flow_match_ip match;
2142
2143         *match_inner_ecn = true;
2144
2145         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2146                 flow_rule_match_enc_ip(rule, &match);
2147                 outer_ecn_key = match.key->tos & INET_ECN_MASK;
2148                 outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2149         }
2150
2151         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2152                 flow_rule_match_ip(rule, &match);
2153                 inner_ecn_key = match.key->tos & INET_ECN_MASK;
2154                 inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2155         }
2156
2157         if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2158                 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2159                 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2160                 return -EOPNOTSUPP;
2161         }
2162
2163         if (!outer_ecn_mask) {
2164                 if (!inner_ecn_mask)
2165                         return 0;
2166
2167                 NL_SET_ERR_MSG_MOD(extack,
2168                                    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2169                 netdev_warn(priv->netdev,
2170                             "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2171                 return -EOPNOTSUPP;
2172         }
2173
2174         if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2175                 NL_SET_ERR_MSG_MOD(extack,
2176                                    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2177                 netdev_warn(priv->netdev,
2178                             "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2179                 return -EOPNOTSUPP;
2180         }
2181
2182         if (!inner_ecn_mask)
2183                 return 0;
2184
2185         /* Both inner and outer have full mask on ecn */
2186
2187         if (outer_ecn_key == INET_ECN_ECT_1) {
2188                 /* inner ecn might change by DECAP action */
2189
2190                 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2191                 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2192                 return -EOPNOTSUPP;
2193         }
2194
2195         if (outer_ecn_key != INET_ECN_CE)
2196                 return 0;
2197
2198         if (inner_ecn_key != INET_ECN_CE) {
2199                 /* Can't happen in software, as packet ecn will be changed to CE after decap */
2200                 NL_SET_ERR_MSG_MOD(extack,
2201                                    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2202                 netdev_warn(priv->netdev,
2203                             "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2204                 return -EOPNOTSUPP;
2205         }
2206
2207         /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2208          * drop match on inner ecn
2209          */
2210         *match_inner_ecn = false;
2211
2212         return 0;
2213 }
2214
2215 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2216                              struct mlx5e_tc_flow *flow,
2217                              struct mlx5_flow_spec *spec,
2218                              struct flow_cls_offload *f,
2219                              struct net_device *filter_dev,
2220                              u8 *match_level,
2221                              bool *match_inner)
2222 {
2223         struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2224         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2225         struct netlink_ext_ack *extack = f->common.extack;
2226         bool needs_mapping, sets_mapping;
2227         int err;
2228
2229         if (!mlx5e_is_eswitch_flow(flow)) {
2230                 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2231                 return -EOPNOTSUPP;
2232         }
2233
2234         needs_mapping = !!flow->attr->chain;
2235         sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2236         *match_inner = !needs_mapping;
2237
2238         if ((needs_mapping || sets_mapping) &&
2239             !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2240                 NL_SET_ERR_MSG_MOD(extack,
2241                                    "Chains on tunnel devices isn't supported without register loopback support");
2242                 netdev_warn(priv->netdev,
2243                             "Chains on tunnel devices isn't supported without register loopback support");
2244                 return -EOPNOTSUPP;
2245         }
2246
2247         if (!flow->attr->chain) {
2248                 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2249                                          match_level);
2250                 if (err) {
2251                         NL_SET_ERR_MSG_MOD(extack,
2252                                            "Failed to parse tunnel attributes");
2253                         netdev_warn(priv->netdev,
2254                                     "Failed to parse tunnel attributes");
2255                         return err;
2256                 }
2257
2258                 /* With mpls over udp we decapsulate using packet reformat
2259                  * object
2260                  */
2261                 if (!netif_is_bareudp(filter_dev))
2262                         flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2263                 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2264                 if (err)
2265                         return err;
2266         } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
2267                 struct mlx5_flow_spec *tmp_spec;
2268
2269                 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2270                 if (!tmp_spec) {
2271                         NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
2272                         netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
2273                         return -ENOMEM;
2274                 }
2275                 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2276
2277                 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2278                 if (err) {
2279                         kvfree(tmp_spec);
2280                         NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2281                         netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2282                         return err;
2283                 }
2284                 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2285                 kvfree(tmp_spec);
2286                 if (err)
2287                         return err;
2288         }
2289
2290         if (!needs_mapping && !sets_mapping)
2291                 return 0;
2292
2293         return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2294 }
2295
2296 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2297 {
2298         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2299                             inner_headers);
2300 }
2301
2302 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2303 {
2304         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2305                             inner_headers);
2306 }
2307
2308 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2309 {
2310         return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2311                             outer_headers);
2312 }
2313
2314 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2315 {
2316         return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2317                             outer_headers);
2318 }
2319
2320 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2321 {
2322         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2323                 get_match_inner_headers_value(spec) :
2324                 get_match_outer_headers_value(spec);
2325 }
2326
2327 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2328 {
2329         return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2330                 get_match_inner_headers_criteria(spec) :
2331                 get_match_outer_headers_criteria(spec);
2332 }
2333
2334 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2335                                    struct flow_cls_offload *f)
2336 {
2337         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2338         struct netlink_ext_ack *extack = f->common.extack;
2339         struct net_device *ingress_dev;
2340         struct flow_match_meta match;
2341
2342         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2343                 return 0;
2344
2345         flow_rule_match_meta(rule, &match);
2346         if (!match.mask->ingress_ifindex)
2347                 return 0;
2348
2349         if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2350                 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2351                 return -EOPNOTSUPP;
2352         }
2353
2354         ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2355                                          match.key->ingress_ifindex);
2356         if (!ingress_dev) {
2357                 NL_SET_ERR_MSG_MOD(extack,
2358                                    "Can't find the ingress port to match on");
2359                 return -ENOENT;
2360         }
2361
2362         if (ingress_dev != filter_dev) {
2363                 NL_SET_ERR_MSG_MOD(extack,
2364                                    "Can't match on the ingress filter port");
2365                 return -EOPNOTSUPP;
2366         }
2367
2368         return 0;
2369 }
2370
2371 static bool skip_key_basic(struct net_device *filter_dev,
2372                            struct flow_cls_offload *f)
2373 {
2374         /* When doing mpls over udp decap, the user needs to provide
2375          * MPLS_UC as the protocol in order to be able to match on mpls
2376          * label fields.  However, the actual ethertype is IP so we want to
2377          * avoid matching on this, otherwise we'll fail the match.
2378          */
2379         if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2380                 return true;
2381
2382         return false;
2383 }
2384
2385 static int __parse_cls_flower(struct mlx5e_priv *priv,
2386                               struct mlx5e_tc_flow *flow,
2387                               struct mlx5_flow_spec *spec,
2388                               struct flow_cls_offload *f,
2389                               struct net_device *filter_dev,
2390                               u8 *inner_match_level, u8 *outer_match_level)
2391 {
2392         struct netlink_ext_ack *extack = f->common.extack;
2393         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2394                                        outer_headers);
2395         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2396                                        outer_headers);
2397         void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2398                                     misc_parameters);
2399         void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2400                                     misc_parameters);
2401         void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2402                                     misc_parameters_3);
2403         void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2404                                     misc_parameters_3);
2405         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2406         struct flow_dissector *dissector = rule->match.dissector;
2407         enum fs_flow_table_type fs_type;
2408         bool match_inner_ecn = true;
2409         u16 addr_type = 0;
2410         u8 ip_proto = 0;
2411         u8 *match_level;
2412         int err;
2413
2414         fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2415         match_level = outer_match_level;
2416
2417         if (dissector->used_keys &
2418             ~(BIT(FLOW_DISSECTOR_KEY_META) |
2419               BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2420               BIT(FLOW_DISSECTOR_KEY_BASIC) |
2421               BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2422               BIT(FLOW_DISSECTOR_KEY_VLAN) |
2423               BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2424               BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2425               BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2426               BIT(FLOW_DISSECTOR_KEY_PORTS) |
2427               BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2428               BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2429               BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2430               BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2431               BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2432               BIT(FLOW_DISSECTOR_KEY_TCP) |
2433               BIT(FLOW_DISSECTOR_KEY_IP)  |
2434               BIT(FLOW_DISSECTOR_KEY_CT) |
2435               BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2436               BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2437               BIT(FLOW_DISSECTOR_KEY_ICMP) |
2438               BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2439                 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2440                 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2441                            dissector->used_keys);
2442                 return -EOPNOTSUPP;
2443         }
2444
2445         if (mlx5e_get_tc_tun(filter_dev)) {
2446                 bool match_inner = false;
2447
2448                 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2449                                         outer_match_level, &match_inner);
2450                 if (err)
2451                         return err;
2452
2453                 if (match_inner) {
2454                         /* header pointers should point to the inner headers
2455                          * if the packet was decapsulated already.
2456                          * outer headers are set by parse_tunnel_attr.
2457                          */
2458                         match_level = inner_match_level;
2459                         headers_c = get_match_inner_headers_criteria(spec);
2460                         headers_v = get_match_inner_headers_value(spec);
2461                 }
2462
2463                 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2464                 if (err)
2465                         return err;
2466         }
2467
2468         err = mlx5e_flower_parse_meta(filter_dev, f);
2469         if (err)
2470                 return err;
2471
2472         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2473             !skip_key_basic(filter_dev, f)) {
2474                 struct flow_match_basic match;
2475
2476                 flow_rule_match_basic(rule, &match);
2477                 mlx5e_tc_set_ethertype(priv->mdev, &match,
2478                                        match_level == outer_match_level,
2479                                        headers_c, headers_v);
2480
2481                 if (match.mask->n_proto)
2482                         *match_level = MLX5_MATCH_L2;
2483         }
2484         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2485             is_vlan_dev(filter_dev)) {
2486                 struct flow_dissector_key_vlan filter_dev_mask;
2487                 struct flow_dissector_key_vlan filter_dev_key;
2488                 struct flow_match_vlan match;
2489
2490                 if (is_vlan_dev(filter_dev)) {
2491                         match.key = &filter_dev_key;
2492                         match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2493                         match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2494                         match.key->vlan_priority = 0;
2495                         match.mask = &filter_dev_mask;
2496                         memset(match.mask, 0xff, sizeof(*match.mask));
2497                         match.mask->vlan_priority = 0;
2498                 } else {
2499                         flow_rule_match_vlan(rule, &match);
2500                 }
2501                 if (match.mask->vlan_id ||
2502                     match.mask->vlan_priority ||
2503                     match.mask->vlan_tpid) {
2504                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2505                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2506                                          svlan_tag, 1);
2507                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2508                                          svlan_tag, 1);
2509                         } else {
2510                                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2511                                          cvlan_tag, 1);
2512                                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2513                                          cvlan_tag, 1);
2514                         }
2515
2516                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2517                                  match.mask->vlan_id);
2518                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2519                                  match.key->vlan_id);
2520
2521                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2522                                  match.mask->vlan_priority);
2523                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2524                                  match.key->vlan_priority);
2525
2526                         *match_level = MLX5_MATCH_L2;
2527
2528                         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2529                             match.mask->vlan_eth_type &&
2530                             MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2531                                                     ft_field_support.outer_second_vid,
2532                                                     fs_type)) {
2533                                 MLX5_SET(fte_match_set_misc, misc_c,
2534                                          outer_second_cvlan_tag, 1);
2535                                 spec->match_criteria_enable |=
2536                                         MLX5_MATCH_MISC_PARAMETERS;
2537                         }
2538                 }
2539         } else if (*match_level != MLX5_MATCH_NONE) {
2540                 /* cvlan_tag enabled in match criteria and
2541                  * disabled in match value means both S & C tags
2542                  * don't exist (untagged of both)
2543                  */
2544                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2545                 *match_level = MLX5_MATCH_L2;
2546         }
2547
2548         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2549                 struct flow_match_vlan match;
2550
2551                 flow_rule_match_cvlan(rule, &match);
2552                 if (match.mask->vlan_id ||
2553                     match.mask->vlan_priority ||
2554                     match.mask->vlan_tpid) {
2555                         if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2556                                                      fs_type)) {
2557                                 NL_SET_ERR_MSG_MOD(extack,
2558                                                    "Matching on CVLAN is not supported");
2559                                 return -EOPNOTSUPP;
2560                         }
2561
2562                         if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2563                                 MLX5_SET(fte_match_set_misc, misc_c,
2564                                          outer_second_svlan_tag, 1);
2565                                 MLX5_SET(fte_match_set_misc, misc_v,
2566                                          outer_second_svlan_tag, 1);
2567                         } else {
2568                                 MLX5_SET(fte_match_set_misc, misc_c,
2569                                          outer_second_cvlan_tag, 1);
2570                                 MLX5_SET(fte_match_set_misc, misc_v,
2571                                          outer_second_cvlan_tag, 1);
2572                         }
2573
2574                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2575                                  match.mask->vlan_id);
2576                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2577                                  match.key->vlan_id);
2578                         MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2579                                  match.mask->vlan_priority);
2580                         MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2581                                  match.key->vlan_priority);
2582
2583                         *match_level = MLX5_MATCH_L2;
2584                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2585                 }
2586         }
2587
2588         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2589                 struct flow_match_eth_addrs match;
2590
2591                 flow_rule_match_eth_addrs(rule, &match);
2592                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2593                                              dmac_47_16),
2594                                 match.mask->dst);
2595                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2596                                              dmac_47_16),
2597                                 match.key->dst);
2598
2599                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2600                                              smac_47_16),
2601                                 match.mask->src);
2602                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2603                                              smac_47_16),
2604                                 match.key->src);
2605
2606                 if (!is_zero_ether_addr(match.mask->src) ||
2607                     !is_zero_ether_addr(match.mask->dst))
2608                         *match_level = MLX5_MATCH_L2;
2609         }
2610
2611         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2612                 struct flow_match_control match;
2613
2614                 flow_rule_match_control(rule, &match);
2615                 addr_type = match.key->addr_type;
2616
2617                 /* the HW doesn't support frag first/later */
2618                 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2619                         NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2620                         return -EOPNOTSUPP;
2621                 }
2622
2623                 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2624                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2625                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2626                                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2627
2628                         /* the HW doesn't need L3 inline to match on frag=no */
2629                         if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2630                                 *match_level = MLX5_MATCH_L2;
2631         /* ***  L2 attributes parsing up to here *** */
2632                         else
2633                                 *match_level = MLX5_MATCH_L3;
2634                 }
2635         }
2636
2637         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2638                 struct flow_match_basic match;
2639
2640                 flow_rule_match_basic(rule, &match);
2641                 ip_proto = match.key->ip_proto;
2642
2643                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2644                          match.mask->ip_proto);
2645                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2646                          match.key->ip_proto);
2647
2648                 if (match.mask->ip_proto)
2649                         *match_level = MLX5_MATCH_L3;
2650         }
2651
2652         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2653                 struct flow_match_ipv4_addrs match;
2654
2655                 flow_rule_match_ipv4_addrs(rule, &match);
2656                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2657                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2658                        &match.mask->src, sizeof(match.mask->src));
2659                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2660                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2661                        &match.key->src, sizeof(match.key->src));
2662                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2663                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2664                        &match.mask->dst, sizeof(match.mask->dst));
2665                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2666                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2667                        &match.key->dst, sizeof(match.key->dst));
2668
2669                 if (match.mask->src || match.mask->dst)
2670                         *match_level = MLX5_MATCH_L3;
2671         }
2672
2673         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2674                 struct flow_match_ipv6_addrs match;
2675
2676                 flow_rule_match_ipv6_addrs(rule, &match);
2677                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2678                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2679                        &match.mask->src, sizeof(match.mask->src));
2680                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2681                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2682                        &match.key->src, sizeof(match.key->src));
2683
2684                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2685                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2686                        &match.mask->dst, sizeof(match.mask->dst));
2687                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2688                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2689                        &match.key->dst, sizeof(match.key->dst));
2690
2691                 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2692                     ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2693                         *match_level = MLX5_MATCH_L3;
2694         }
2695
2696         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2697                 struct flow_match_ip match;
2698
2699                 flow_rule_match_ip(rule, &match);
2700                 if (match_inner_ecn) {
2701                         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2702                                  match.mask->tos & 0x3);
2703                         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2704                                  match.key->tos & 0x3);
2705                 }
2706
2707                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2708                          match.mask->tos >> 2);
2709                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2710                          match.key->tos  >> 2);
2711
2712                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2713                          match.mask->ttl);
2714                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2715                          match.key->ttl);
2716
2717                 if (match.mask->ttl &&
2718                     !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2719                                                 ft_field_support.outer_ipv4_ttl)) {
2720                         NL_SET_ERR_MSG_MOD(extack,
2721                                            "Matching on TTL is not supported");
2722                         return -EOPNOTSUPP;
2723                 }
2724
2725                 if (match.mask->tos || match.mask->ttl)
2726                         *match_level = MLX5_MATCH_L3;
2727         }
2728
2729         /* ***  L3 attributes parsing up to here *** */
2730
2731         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2732                 struct flow_match_ports match;
2733
2734                 flow_rule_match_ports(rule, &match);
2735                 switch (ip_proto) {
2736                 case IPPROTO_TCP:
2737                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2738                                  tcp_sport, ntohs(match.mask->src));
2739                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2740                                  tcp_sport, ntohs(match.key->src));
2741
2742                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2743                                  tcp_dport, ntohs(match.mask->dst));
2744                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2745                                  tcp_dport, ntohs(match.key->dst));
2746                         break;
2747
2748                 case IPPROTO_UDP:
2749                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2750                                  udp_sport, ntohs(match.mask->src));
2751                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2752                                  udp_sport, ntohs(match.key->src));
2753
2754                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2755                                  udp_dport, ntohs(match.mask->dst));
2756                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2757                                  udp_dport, ntohs(match.key->dst));
2758                         break;
2759                 default:
2760                         NL_SET_ERR_MSG_MOD(extack,
2761                                            "Only UDP and TCP transports are supported for L4 matching");
2762                         netdev_err(priv->netdev,
2763                                    "Only UDP and TCP transport are supported\n");
2764                         return -EINVAL;
2765                 }
2766
2767                 if (match.mask->src || match.mask->dst)
2768                         *match_level = MLX5_MATCH_L4;
2769         }
2770
2771         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2772                 struct flow_match_tcp match;
2773
2774                 flow_rule_match_tcp(rule, &match);
2775                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2776                          ntohs(match.mask->flags));
2777                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2778                          ntohs(match.key->flags));
2779
2780                 if (match.mask->flags)
2781                         *match_level = MLX5_MATCH_L4;
2782         }
2783         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2784                 struct flow_match_icmp match;
2785
2786                 flow_rule_match_icmp(rule, &match);
2787                 switch (ip_proto) {
2788                 case IPPROTO_ICMP:
2789                         if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2790                               MLX5_FLEX_PROTO_ICMP)) {
2791                                 NL_SET_ERR_MSG_MOD(extack,
2792                                                    "Match on Flex protocols for ICMP is not supported");
2793                                 return -EOPNOTSUPP;
2794                         }
2795                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2796                                  match.mask->type);
2797                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2798                                  match.key->type);
2799                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2800                                  match.mask->code);
2801                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2802                                  match.key->code);
2803                         break;
2804                 case IPPROTO_ICMPV6:
2805                         if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2806                               MLX5_FLEX_PROTO_ICMPV6)) {
2807                                 NL_SET_ERR_MSG_MOD(extack,
2808                                                    "Match on Flex protocols for ICMPV6 is not supported");
2809                                 return -EOPNOTSUPP;
2810                         }
2811                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2812                                  match.mask->type);
2813                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
2814                                  match.key->type);
2815                         MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
2816                                  match.mask->code);
2817                         MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
2818                                  match.key->code);
2819                         break;
2820                 default:
2821                         NL_SET_ERR_MSG_MOD(extack,
2822                                            "Code and type matching only with ICMP and ICMPv6");
2823                         netdev_err(priv->netdev,
2824                                    "Code and type matching only with ICMP and ICMPv6\n");
2825                         return -EINVAL;
2826                 }
2827                 if (match.mask->code || match.mask->type) {
2828                         *match_level = MLX5_MATCH_L4;
2829                         spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
2830                 }
2831         }
2832         /* Currently supported only for MPLS over UDP */
2833         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2834             !netif_is_bareudp(filter_dev)) {
2835                 NL_SET_ERR_MSG_MOD(extack,
2836                                    "Matching on MPLS is supported only for MPLS over UDP");
2837                 netdev_err(priv->netdev,
2838                            "Matching on MPLS is supported only for MPLS over UDP\n");
2839                 return -EOPNOTSUPP;
2840         }
2841
2842         return 0;
2843 }
2844
2845 static int parse_cls_flower(struct mlx5e_priv *priv,
2846                             struct mlx5e_tc_flow *flow,
2847                             struct mlx5_flow_spec *spec,
2848                             struct flow_cls_offload *f,
2849                             struct net_device *filter_dev)
2850 {
2851         u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2852         struct netlink_ext_ack *extack = f->common.extack;
2853         struct mlx5_core_dev *dev = priv->mdev;
2854         struct mlx5_eswitch *esw = dev->priv.eswitch;
2855         struct mlx5e_rep_priv *rpriv = priv->ppriv;
2856         struct mlx5_eswitch_rep *rep;
2857         bool is_eswitch_flow;
2858         int err;
2859
2860         inner_match_level = MLX5_MATCH_NONE;
2861         outer_match_level = MLX5_MATCH_NONE;
2862
2863         err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2864                                  &inner_match_level, &outer_match_level);
2865         non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2866                                  outer_match_level : inner_match_level;
2867
2868         is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2869         if (!err && is_eswitch_flow) {
2870                 rep = rpriv->rep;
2871                 if (rep->vport != MLX5_VPORT_UPLINK &&
2872                     (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2873                     esw->offloads.inline_mode < non_tunnel_match_level)) {
2874                         NL_SET_ERR_MSG_MOD(extack,
2875                                            "Flow is not offloaded due to min inline setting");
2876                         netdev_warn(priv->netdev,
2877                                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2878                                     non_tunnel_match_level, esw->offloads.inline_mode);
2879                         return -EOPNOTSUPP;
2880                 }
2881         }
2882
2883         flow->attr->inner_match_level = inner_match_level;
2884         flow->attr->outer_match_level = outer_match_level;
2885
2886
2887         return err;
2888 }
2889
2890 struct mlx5_fields {
2891         u8  field;
2892         u8  field_bsize;
2893         u32 field_mask;
2894         u32 offset;
2895         u32 match_offset;
2896 };
2897
2898 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2899                 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2900                  offsetof(struct pedit_headers, field) + (off), \
2901                  MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2902
2903 /* masked values are the same and there are no rewrites that do not have a
2904  * match.
2905  */
2906 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2907         type matchmaskx = *(type *)(matchmaskp); \
2908         type matchvalx = *(type *)(matchvalp); \
2909         type maskx = *(type *)(maskp); \
2910         type valx = *(type *)(valp); \
2911         \
2912         (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2913                                                                  matchmaskx)); \
2914 })
2915
2916 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2917                          void *matchmaskp, u8 bsize)
2918 {
2919         bool same = false;
2920
2921         switch (bsize) {
2922         case 8:
2923                 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2924                 break;
2925         case 16:
2926                 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2927                 break;
2928         case 32:
2929                 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2930                 break;
2931         }
2932
2933         return same;
2934 }
2935
2936 static struct mlx5_fields fields[] = {
2937         OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2938         OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2939         OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2940         OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2941         OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2942         OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2943
2944         OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2945         OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2946         OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2947         OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2948
2949         OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2950                 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2951         OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2952                 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2953         OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2954                 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2955         OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2956                 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2957         OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2958                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2959         OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2960                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2961         OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2962                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2963         OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2964                 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2965         OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2966         OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
2967
2968         OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2969         OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2970         /* in linux iphdr tcp_flags is 8 bits long */
2971         OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2972
2973         OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2974         OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2975 };
2976
2977 static unsigned long mask_to_le(unsigned long mask, int size)
2978 {
2979         __be32 mask_be32;
2980         __be16 mask_be16;
2981
2982         if (size == 32) {
2983                 mask_be32 = (__force __be32)(mask);
2984                 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2985         } else if (size == 16) {
2986                 mask_be32 = (__force __be32)(mask);
2987                 mask_be16 = *(__be16 *)&mask_be32;
2988                 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2989         }
2990
2991         return mask;
2992 }
2993
2994 static int offload_pedit_fields(struct mlx5e_priv *priv,
2995                                 int namespace,
2996                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2997                                 u32 *action_flags,
2998                                 struct netlink_ext_ack *extack)
2999 {
3000         struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3001         struct pedit_headers_action *hdrs = parse_attr->hdrs;
3002         void *headers_c, *headers_v, *action, *vals_p;
3003         u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
3004         struct mlx5e_tc_mod_hdr_acts *mod_acts;
3005         unsigned long mask, field_mask;
3006         int i, first, last, next_z;
3007         struct mlx5_fields *f;
3008         u8 cmd;
3009
3010         mod_acts = &parse_attr->mod_hdr_acts;
3011         headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3012         headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3013
3014         set_masks = &hdrs[0].masks;
3015         add_masks = &hdrs[1].masks;
3016         set_vals = &hdrs[0].vals;
3017         add_vals = &hdrs[1].vals;
3018
3019         for (i = 0; i < ARRAY_SIZE(fields); i++) {
3020                 bool skip;
3021
3022                 f = &fields[i];
3023                 /* avoid seeing bits set from previous iterations */
3024                 s_mask = 0;
3025                 a_mask = 0;
3026
3027                 s_masks_p = (void *)set_masks + f->offset;
3028                 a_masks_p = (void *)add_masks + f->offset;
3029
3030                 s_mask = *s_masks_p & f->field_mask;
3031                 a_mask = *a_masks_p & f->field_mask;
3032
3033                 if (!s_mask && !a_mask) /* nothing to offload here */
3034                         continue;
3035
3036                 if (s_mask && a_mask) {
3037                         NL_SET_ERR_MSG_MOD(extack,
3038                                            "can't set and add to the same HW field");
3039                         netdev_warn(priv->netdev,
3040                                     "mlx5: can't set and add to the same HW field (%x)\n",
3041                                     f->field);
3042                         return -EOPNOTSUPP;
3043                 }
3044
3045                 skip = false;
3046                 if (s_mask) {
3047                         void *match_mask = headers_c + f->match_offset;
3048                         void *match_val = headers_v + f->match_offset;
3049
3050                         cmd  = MLX5_ACTION_TYPE_SET;
3051                         mask = s_mask;
3052                         vals_p = (void *)set_vals + f->offset;
3053                         /* don't rewrite if we have a match on the same value */
3054                         if (cmp_val_mask(vals_p, s_masks_p, match_val,
3055                                          match_mask, f->field_bsize))
3056                                 skip = true;
3057                         /* clear to denote we consumed this field */
3058                         *s_masks_p &= ~f->field_mask;
3059                 } else {
3060                         cmd  = MLX5_ACTION_TYPE_ADD;
3061                         mask = a_mask;
3062                         vals_p = (void *)add_vals + f->offset;
3063                         /* add 0 is no change */
3064                         if ((*(u32 *)vals_p & f->field_mask) == 0)
3065                                 skip = true;
3066                         /* clear to denote we consumed this field */
3067                         *a_masks_p &= ~f->field_mask;
3068                 }
3069                 if (skip)
3070                         continue;
3071
3072                 mask = mask_to_le(mask, f->field_bsize);
3073
3074                 first = find_first_bit(&mask, f->field_bsize);
3075                 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3076                 last  = find_last_bit(&mask, f->field_bsize);
3077                 if (first < next_z && next_z < last) {
3078                         NL_SET_ERR_MSG_MOD(extack,
3079                                            "rewrite of few sub-fields isn't supported");
3080                         netdev_warn(priv->netdev,
3081                                     "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3082                                     mask);
3083                         return -EOPNOTSUPP;
3084                 }
3085
3086                 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3087                 if (IS_ERR(action)) {
3088                         NL_SET_ERR_MSG_MOD(extack,
3089                                            "too many pedit actions, can't offload");
3090                         mlx5_core_warn(priv->mdev,
3091                                        "mlx5: parsed %d pedit actions, can't do more\n",
3092                                        mod_acts->num_actions);
3093                         return PTR_ERR(action);
3094                 }
3095
3096                 MLX5_SET(set_action_in, action, action_type, cmd);
3097                 MLX5_SET(set_action_in, action, field, f->field);
3098
3099                 if (cmd == MLX5_ACTION_TYPE_SET) {
3100                         int start;
3101
3102                         field_mask = mask_to_le(f->field_mask, f->field_bsize);
3103
3104                         /* if field is bit sized it can start not from first bit */
3105                         start = find_first_bit(&field_mask, f->field_bsize);
3106
3107                         MLX5_SET(set_action_in, action, offset, first - start);
3108                         /* length is num of bits to be written, zero means length of 32 */
3109                         MLX5_SET(set_action_in, action, length, (last - first + 1));
3110                 }
3111
3112                 if (f->field_bsize == 32)
3113                         MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3114                 else if (f->field_bsize == 16)
3115                         MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3116                 else if (f->field_bsize == 8)
3117                         MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3118
3119                 ++mod_acts->num_actions;
3120         }
3121
3122         return 0;
3123 }
3124
3125 static const struct pedit_headers zero_masks = {};
3126
3127 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3128                                        struct mlx5e_tc_flow_parse_attr *parse_attr,
3129                                        struct netlink_ext_ack *extack)
3130 {
3131         struct pedit_headers *cmd_masks;
3132         u8 cmd;
3133
3134         for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3135                 cmd_masks = &parse_attr->hdrs[cmd].masks;
3136                 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3137                         NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3138                         netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3139                         print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3140                                        16, 1, cmd_masks, sizeof(zero_masks), true);
3141                         return -EOPNOTSUPP;
3142                 }
3143         }
3144
3145         return 0;
3146 }
3147
3148 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3149                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3150                                  u32 *action_flags,
3151                                  struct netlink_ext_ack *extack)
3152 {
3153         int err;
3154
3155         err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3156         if (err)
3157                 goto out_dealloc_parsed_actions;
3158
3159         err = verify_offload_pedit_fields(priv, parse_attr, extack);
3160         if (err)
3161                 goto out_dealloc_parsed_actions;
3162
3163         return 0;
3164
3165 out_dealloc_parsed_actions:
3166         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3167         return err;
3168 }
3169
3170 struct ip_ttl_word {
3171         __u8    ttl;
3172         __u8    protocol;
3173         __sum16 check;
3174 };
3175
3176 struct ipv6_hoplimit_word {
3177         __be16  payload_len;
3178         __u8    nexthdr;
3179         __u8    hop_limit;
3180 };
3181
3182 static bool
3183 is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
3184                          bool *modify_ip_header, bool *modify_tuple,
3185                          struct netlink_ext_ack *extack)
3186 {
3187         u32 mask, offset;
3188         u8 htype;
3189
3190         htype = act->mangle.htype;
3191         offset = act->mangle.offset;
3192         mask = ~act->mangle.mask;
3193         /* For IPv4 & IPv6 header check 4 byte word,
3194          * to determine that modified fields
3195          * are NOT ttl & hop_limit only.
3196          */
3197         if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3198                 struct ip_ttl_word *ttl_word =
3199                         (struct ip_ttl_word *)&mask;
3200
3201                 if (offset != offsetof(struct iphdr, ttl) ||
3202                     ttl_word->protocol ||
3203                     ttl_word->check) {
3204                         *modify_ip_header = true;
3205                 }
3206
3207                 if (offset >= offsetof(struct iphdr, saddr))
3208                         *modify_tuple = true;
3209
3210                 if (ct_flow && *modify_tuple) {
3211                         NL_SET_ERR_MSG_MOD(extack,
3212                                            "can't offload re-write of ipv4 address with action ct");
3213                         return false;
3214                 }
3215         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3216                 struct ipv6_hoplimit_word *hoplimit_word =
3217                         (struct ipv6_hoplimit_word *)&mask;
3218
3219                 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3220                     hoplimit_word->payload_len ||
3221                     hoplimit_word->nexthdr) {
3222                         *modify_ip_header = true;
3223                 }
3224
3225                 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3226                         *modify_tuple = true;
3227
3228                 if (ct_flow && *modify_tuple) {
3229                         NL_SET_ERR_MSG_MOD(extack,
3230                                            "can't offload re-write of ipv6 address with action ct");
3231                         return false;
3232                 }
3233         } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3234                    htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3235                 *modify_tuple = true;
3236                 if (ct_flow) {
3237                         NL_SET_ERR_MSG_MOD(extack,
3238                                            "can't offload re-write of transport header ports with action ct");
3239                         return false;
3240                 }
3241         }
3242
3243         return true;
3244 }
3245
3246 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3247                                    bool ct_flow, struct netlink_ext_ack *extack,
3248                                    struct mlx5e_priv *priv,
3249                                    struct mlx5_flow_spec *spec)
3250 {
3251         if (!modify_tuple || ct_clear)
3252                 return true;
3253
3254         if (ct_flow) {
3255                 NL_SET_ERR_MSG_MOD(extack,
3256                                    "can't offload tuple modification with non-clear ct()");
3257                 netdev_info(priv->netdev,
3258                             "can't offload tuple modification with non-clear ct()");
3259                 return false;
3260         }
3261
3262         /* Add ct_state=-trk match so it will be offloaded for non ct flows
3263          * (or after clear action), as otherwise, since the tuple is changed,
3264          * we can't restore ct state
3265          */
3266         if (mlx5_tc_ct_add_no_trk_match(spec)) {
3267                 NL_SET_ERR_MSG_MOD(extack,
3268                                    "can't offload tuple modification with ct matches and no ct(clear) action");
3269                 netdev_info(priv->netdev,
3270                             "can't offload tuple modification with ct matches and no ct(clear) action");
3271                 return false;
3272         }
3273
3274         return true;
3275 }
3276
3277 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3278                                           struct mlx5_flow_spec *spec,
3279                                           struct flow_action *flow_action,
3280                                           u32 actions, bool ct_flow,
3281                                           bool ct_clear,
3282                                           struct netlink_ext_ack *extack)
3283 {
3284         const struct flow_action_entry *act;
3285         bool modify_ip_header, modify_tuple;
3286         void *headers_c;
3287         void *headers_v;
3288         u16 ethertype;
3289         u8 ip_proto;
3290         int i;
3291
3292         headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3293         headers_v = mlx5e_get_match_headers_value(actions, spec);
3294         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3295
3296         /* for non-IP we only re-write MACs, so we're okay */
3297         if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3298             ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3299                 goto out_ok;
3300
3301         modify_ip_header = false;
3302         modify_tuple = false;
3303         flow_action_for_each(i, act, flow_action) {
3304                 if (act->id != FLOW_ACTION_MANGLE &&
3305                     act->id != FLOW_ACTION_ADD)
3306                         continue;
3307
3308                 if (!is_action_keys_supported(act, ct_flow,
3309                                               &modify_ip_header,
3310                                               &modify_tuple, extack))
3311                         return false;
3312         }
3313
3314         if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3315                                     priv, spec))
3316                 return false;
3317
3318         ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3319         if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3320             ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3321                 NL_SET_ERR_MSG_MOD(extack,
3322                                    "can't offload re-write of non TCP/UDP");
3323                 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3324                             ip_proto);
3325                 return false;
3326         }
3327
3328 out_ok:
3329         return true;
3330 }
3331
3332 static bool
3333 actions_match_supported_fdb(struct mlx5e_priv *priv,
3334                             struct mlx5e_tc_flow_parse_attr *parse_attr,
3335                             struct mlx5e_tc_flow *flow,
3336                             struct netlink_ext_ack *extack)
3337 {
3338         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3339         bool ct_flow, ct_clear;
3340
3341         ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3342         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3343
3344         if (esw_attr->split_count && ct_flow &&
3345             !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
3346                 /* All registers used by ct are cleared when using
3347                  * split rules.
3348                  */
3349                 NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
3350                 return false;
3351         }
3352
3353         if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3354                 NL_SET_ERR_MSG_MOD(extack,
3355                                    "current firmware doesn't support split rule for port mirroring");
3356                 netdev_warn_once(priv->netdev,
3357                                  "current firmware doesn't support split rule for port mirroring\n");
3358                 return false;
3359         }
3360
3361         return true;
3362 }
3363
3364 static bool
3365 actions_match_supported(struct mlx5e_priv *priv,
3366                         struct flow_action *flow_action,
3367                         u32 actions,
3368                         struct mlx5e_tc_flow_parse_attr *parse_attr,
3369                         struct mlx5e_tc_flow *flow,
3370                         struct netlink_ext_ack *extack)
3371 {
3372         bool ct_flow, ct_clear;
3373
3374         ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3375         ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3376
3377         if (!(actions &
3378               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3379                 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
3380                 return false;
3381         }
3382
3383         if (!(~actions &
3384               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3385                 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3386                 return false;
3387         }
3388
3389         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3390             actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3391                 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3392                 return false;
3393         }
3394
3395         if (!(~actions &
3396               (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3397                 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3398                 return false;
3399         }
3400
3401         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3402             actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3403                 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3404                 return false;
3405         }
3406
3407         if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3408             !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
3409                                            actions, ct_flow, ct_clear, extack))
3410                 return false;
3411
3412         if (mlx5e_is_eswitch_flow(flow) &&
3413             !actions_match_supported_fdb(priv, parse_attr, flow, extack))
3414                 return false;
3415
3416         return true;
3417 }
3418
3419 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3420 {
3421         return priv->mdev == peer_priv->mdev;
3422 }
3423
3424 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3425 {
3426         struct mlx5_core_dev *fmdev, *pmdev;
3427         u64 fsystem_guid, psystem_guid;
3428
3429         fmdev = priv->mdev;
3430         pmdev = peer_priv->mdev;
3431
3432         fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3433         psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3434
3435         return (fsystem_guid == psystem_guid);
3436 }
3437
3438 static int
3439 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3440                                 struct mlx5e_tc_flow *flow,
3441                                 struct mlx5_flow_attr *attr,
3442                                 struct netlink_ext_ack *extack)
3443 {
3444         struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3445         struct pedit_headers_action *hdrs = parse_attr->hdrs;
3446         enum mlx5_flow_namespace_type ns_type;
3447         int err;
3448
3449         if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3450             !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3451                 return 0;
3452
3453         ns_type = mlx5e_get_flow_namespace(flow);
3454
3455         err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3456         if (err)
3457                 return err;
3458
3459         if (parse_attr->mod_hdr_acts.num_actions > 0)
3460                 return 0;
3461
3462         /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3463         attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3464         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3465
3466         if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3467                 return 0;
3468
3469         if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3470               (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3471                 attr->esw_attr->split_count = 0;
3472
3473         return 0;
3474 }
3475
3476 static struct mlx5_flow_attr*
3477 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3478                                    enum mlx5_flow_namespace_type ns_type)
3479 {
3480         struct mlx5e_tc_flow_parse_attr *parse_attr;
3481         u32 attr_sz = ns_to_attr_sz(ns_type);
3482         struct mlx5_flow_attr *attr2;
3483
3484         attr2 = mlx5_alloc_flow_attr(ns_type);
3485         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3486         if (!attr2 || !parse_attr) {
3487                 kvfree(parse_attr);
3488                 kfree(attr2);
3489                 return NULL;
3490         }
3491
3492         memcpy(attr2, attr, attr_sz);
3493         INIT_LIST_HEAD(&attr2->list);
3494         parse_attr->filter_dev = attr->parse_attr->filter_dev;
3495         attr2->action = 0;
3496         attr2->flags = 0;
3497         attr2->parse_attr = parse_attr;
3498         return attr2;
3499 }
3500
3501 static struct mlx5_core_dev *
3502 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
3503 {
3504         return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
3505 }
3506
3507 struct mlx5_flow_attr *
3508 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3509 {
3510         struct mlx5_esw_flow_attr *esw_attr;
3511         struct mlx5_flow_attr *attr;
3512         int i;
3513
3514         list_for_each_entry(attr, &flow->attrs, list) {
3515                 esw_attr = attr->esw_attr;
3516                 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3517                         if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3518                                 return attr;
3519                 }
3520         }
3521
3522         return NULL;
3523 }
3524
3525 void
3526 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3527 {
3528         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3529         struct mlx5_flow_attr *attr;
3530
3531         list_for_each_entry(attr, &flow->attrs, list) {
3532                 if (list_is_last(&attr->list, &flow->attrs))
3533                         break;
3534
3535                 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3536         }
3537 }
3538
3539 static void
3540 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3541 {
3542         struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
3543         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3544         struct mlx5_flow_attr *attr, *tmp;
3545         bool vf_tun;
3546
3547         list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3548                 if (list_is_last(&attr->list, &flow->attrs))
3549                         break;
3550
3551                 if (attr->post_act_handle)
3552                         mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
3553
3554                 clean_encap_dests(flow->priv, flow, attr, &vf_tun);
3555
3556                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
3557                         mlx5_fc_destroy(counter_dev, attr->counter);
3558
3559                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3560                         mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
3561                         if (attr->modify_hdr)
3562                                 mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
3563                 }
3564
3565                 list_del(&attr->list);
3566                 kvfree(attr->parse_attr);
3567                 kfree(attr);
3568         }
3569 }
3570
3571 int
3572 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3573 {
3574         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3575         struct mlx5_flow_attr *attr;
3576         int err = 0;
3577
3578         list_for_each_entry(attr, &flow->attrs, list) {
3579                 if (list_is_last(&attr->list, &flow->attrs))
3580                         break;
3581
3582                 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3583                 if (err)
3584                         break;
3585         }
3586
3587         return err;
3588 }
3589
3590 /* TC filter rule HW translation:
3591  *
3592  * +---------------------+
3593  * + ft prio (tc chain)  +
3594  * + original match      +
3595  * +---------------------+
3596  *           |
3597  *           | if multi table action
3598  *           |
3599  *           v
3600  * +---------------------+
3601  * + post act ft         |<----.
3602  * + match fte id        |     | split on multi table action
3603  * + do actions          |-----'
3604  * +---------------------+
3605  *           |
3606  *           |
3607  *           v
3608  * Do rest of the actions after last multi table action.
3609  */
3610 static int
3611 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3612 {
3613         struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3614         struct mlx5_flow_attr *attr, *next_attr = NULL;
3615         struct mlx5e_post_act_handle *handle;
3616         bool vf_tun, encap_valid = true;
3617         int err;
3618
3619         /* This is going in reverse order as needed.
3620          * The first entry is the last attribute.
3621          */
3622         list_for_each_entry(attr, &flow->attrs, list) {
3623                 if (!next_attr) {
3624                         /* Set counter action on last post act rule. */
3625                         attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3626                 } else {
3627                         err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3628                         if (err)
3629                                 goto out_free;
3630                 }
3631
3632                 /* Don't add post_act rule for first attr (last in the list).
3633                  * It's being handled by the caller.
3634                  */
3635                 if (list_is_last(&attr->list, &flow->attrs))
3636                         break;
3637
3638                 err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
3639                 if (err)
3640                         goto out_free;
3641
3642                 if (!encap_valid)
3643                         flow_flag_set(flow, SLOW);
3644
3645                 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3646                 if (err)
3647                         goto out_free;
3648
3649                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3650                         err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
3651                         if (err)
3652                                 goto out_free;
3653                 }
3654
3655                 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
3656                         err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
3657                         if (err)
3658                                 goto out_free;
3659                 }
3660
3661                 handle = mlx5e_tc_post_act_add(post_act, attr);
3662                 if (IS_ERR(handle)) {
3663                         err = PTR_ERR(handle);
3664                         goto out_free;
3665                 }
3666
3667                 attr->post_act_handle = handle;
3668                 next_attr = attr;
3669         }
3670
3671         if (flow_flag_test(flow, SLOW))
3672                 goto out;
3673
3674         err = mlx5e_tc_offload_flow_post_acts(flow);
3675         if (err)
3676                 goto out_free;
3677
3678 out:
3679         return 0;
3680
3681 out_free:
3682         free_flow_post_acts(flow);
3683         return err;
3684 }
3685
3686 static int
3687 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
3688                  struct flow_action *flow_action)
3689 {
3690         struct netlink_ext_ack *extack = parse_state->extack;
3691         struct mlx5e_tc_flow_action flow_action_reorder;
3692         struct mlx5e_tc_flow *flow = parse_state->flow;
3693         struct mlx5_flow_attr *attr = flow->attr;
3694         enum mlx5_flow_namespace_type ns_type;
3695         struct mlx5e_priv *priv = flow->priv;
3696         struct flow_action_entry *act, **_act;
3697         struct mlx5e_tc_act *tc_act;
3698         int err, i;
3699
3700         flow_action_reorder.num_entries = flow_action->num_entries;
3701         flow_action_reorder.entries = kcalloc(flow_action->num_entries,
3702                                               sizeof(flow_action), GFP_KERNEL);
3703         if (!flow_action_reorder.entries)
3704                 return -ENOMEM;
3705
3706         mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
3707
3708         ns_type = mlx5e_get_flow_namespace(flow);
3709         list_add(&attr->list, &flow->attrs);
3710
3711         flow_action_for_each(i, _act, &flow_action_reorder) {
3712                 act = *_act;
3713                 tc_act = mlx5e_tc_act_get(act->id, ns_type);
3714                 if (!tc_act) {
3715                         NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
3716                         err = -EOPNOTSUPP;
3717                         goto out_free;
3718                 }
3719
3720                 if (!tc_act->can_offload(parse_state, act, i, attr)) {
3721                         err = -EOPNOTSUPP;
3722                         goto out_free;
3723                 }
3724
3725                 err = tc_act->parse_action(parse_state, act, priv, attr);
3726                 if (err)
3727                         goto out_free;
3728
3729                 parse_state->actions |= attr->action;
3730
3731                 /* Split attr for multi table act if not the last act. */
3732                 if (tc_act->is_multi_table_act &&
3733                     tc_act->is_multi_table_act(priv, act, attr) &&
3734                     i < flow_action_reorder.num_entries - 1) {
3735                         err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3736                         if (err)
3737                                 goto out_free;
3738
3739                         attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
3740                         if (!attr) {
3741                                 err = -ENOMEM;
3742                                 goto out_free;
3743                         }
3744
3745                         list_add(&attr->list, &flow->attrs);
3746                 }
3747         }
3748
3749         kfree(flow_action_reorder.entries);
3750
3751         err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3752         if (err)
3753                 goto out_free_post_acts;
3754
3755         err = alloc_flow_post_acts(flow, extack);
3756         if (err)
3757                 goto out_free_post_acts;
3758
3759         return 0;
3760
3761 out_free:
3762         kfree(flow_action_reorder.entries);
3763 out_free_post_acts:
3764         free_flow_post_acts(flow);
3765
3766         return err;
3767 }
3768
3769 static int
3770 flow_action_supported(struct flow_action *flow_action,
3771                       struct netlink_ext_ack *extack)
3772 {
3773         if (!flow_action_has_entries(flow_action)) {
3774                 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
3775                 return -EINVAL;
3776         }
3777
3778         if (!flow_action_hw_stats_check(flow_action, extack,
3779                                         FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
3780                 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
3781                 return -EOPNOTSUPP;
3782         }
3783
3784         return 0;
3785 }
3786
3787 static int
3788 parse_tc_nic_actions(struct mlx5e_priv *priv,
3789                      struct flow_action *flow_action,
3790                      struct mlx5e_tc_flow *flow,
3791                      struct netlink_ext_ack *extack)
3792 {
3793         struct mlx5e_tc_act_parse_state *parse_state;
3794         struct mlx5e_tc_flow_parse_attr *parse_attr;
3795         struct mlx5_flow_attr *attr = flow->attr;
3796         int err;
3797
3798         err = flow_action_supported(flow_action, extack);
3799         if (err)
3800                 return err;
3801
3802         attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3803         parse_attr = attr->parse_attr;
3804         parse_state = &parse_attr->parse_state;
3805         mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
3806         parse_state->ct_priv = get_ct_priv(priv);
3807
3808         err = parse_tc_actions(parse_state, flow_action);
3809         if (err)
3810                 return err;
3811
3812         err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
3813         if (err)
3814                 return err;
3815
3816         if (!actions_match_supported(priv, flow_action, parse_state->actions,
3817                                      parse_attr, flow, extack))
3818                 return -EOPNOTSUPP;
3819
3820         return 0;
3821 }
3822
3823 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3824                                   struct net_device *peer_netdev)
3825 {
3826         struct mlx5e_priv *peer_priv;
3827
3828         peer_priv = netdev_priv(peer_netdev);
3829
3830         return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3831                 mlx5e_eswitch_vf_rep(priv->netdev) &&
3832                 mlx5e_eswitch_vf_rep(peer_netdev) &&
3833                 mlx5e_same_hw_devs(priv, peer_priv));
3834 }
3835
3836 static bool same_hw_reps(struct mlx5e_priv *priv,
3837                          struct net_device *peer_netdev)
3838 {
3839         struct mlx5e_priv *peer_priv;
3840
3841         peer_priv = netdev_priv(peer_netdev);
3842
3843         return mlx5e_eswitch_rep(priv->netdev) &&
3844                mlx5e_eswitch_rep(peer_netdev) &&
3845                mlx5e_same_hw_devs(priv, peer_priv);
3846 }
3847
3848 static bool is_lag_dev(struct mlx5e_priv *priv,
3849                        struct net_device *peer_netdev)
3850 {
3851         return ((mlx5_lag_is_sriov(priv->mdev) ||
3852                  mlx5_lag_is_multipath(priv->mdev)) &&
3853                  same_hw_reps(priv, peer_netdev));
3854 }
3855
3856 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
3857 {
3858         if (mlx5e_eswitch_uplink_rep(out_dev) &&
3859             MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
3860             MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
3861                 return true;
3862
3863         return false;
3864 }
3865
3866 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3867                                     struct net_device *out_dev)
3868 {
3869         if (is_merged_eswitch_vfs(priv, out_dev))
3870                 return true;
3871
3872         if (is_multiport_eligible(priv, out_dev))
3873                 return true;
3874
3875         if (is_lag_dev(priv, out_dev))
3876                 return true;
3877
3878         return mlx5e_eswitch_rep(out_dev) &&
3879                same_port_devs(priv, netdev_priv(out_dev));
3880 }
3881
3882 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
3883                                       struct mlx5_flow_attr *attr,
3884                                       int ifindex,
3885                                       enum mlx5e_tc_int_port_type type,
3886                                       u32 *action,
3887                                       int out_index)
3888 {
3889         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
3890         struct mlx5e_tc_int_port_priv *int_port_priv;
3891         struct mlx5e_tc_flow_parse_attr *parse_attr;
3892         struct mlx5e_tc_int_port *dest_int_port;
3893         int err;
3894
3895         parse_attr = attr->parse_attr;
3896         int_port_priv = mlx5e_get_int_port_priv(priv);
3897
3898         dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
3899         if (IS_ERR(dest_int_port))
3900                 return PTR_ERR(dest_int_port);
3901
3902         err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
3903                                         MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
3904                                         mlx5e_tc_int_port_get_metadata(dest_int_port));
3905         if (err) {
3906                 mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
3907                 return err;
3908         }
3909
3910         *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3911
3912         esw_attr->dest_int_port = dest_int_port;
3913         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
3914
3915         /* Forward to root fdb for matching against the new source vport */
3916         attr->dest_chain = 0;
3917
3918         return 0;
3919 }
3920
3921 static int
3922 parse_tc_fdb_actions(struct mlx5e_priv *priv,
3923                      struct flow_action *flow_action,
3924                      struct mlx5e_tc_flow *flow,
3925                      struct netlink_ext_ack *extack)
3926 {
3927         struct mlx5e_tc_act_parse_state *parse_state;
3928         struct mlx5e_tc_flow_parse_attr *parse_attr;
3929         struct mlx5_flow_attr *attr = flow->attr;
3930         struct mlx5_esw_flow_attr *esw_attr;
3931         int err;
3932
3933         err = flow_action_supported(flow_action, extack);
3934         if (err)
3935                 return err;
3936
3937         esw_attr = attr->esw_attr;
3938         parse_attr = attr->parse_attr;
3939         parse_state = &parse_attr->parse_state;
3940         mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
3941         parse_state->ct_priv = get_ct_priv(priv);
3942
3943         err = parse_tc_actions(parse_state, flow_action);
3944         if (err)
3945                 return err;
3946
3947         /* Forward to/from internal port can only have 1 dest */
3948         if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) &&
3949             esw_attr->out_count > 1) {
3950                 NL_SET_ERR_MSG_MOD(extack,
3951                                    "Rules with internal port can have only one destination");
3952                 return -EOPNOTSUPP;
3953         }
3954
3955         err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
3956         if (err)
3957                 return err;
3958
3959         if (!actions_match_supported(priv, flow_action, parse_state->actions,
3960                                      parse_attr, flow, extack))
3961                 return -EOPNOTSUPP;
3962
3963         return 0;
3964 }
3965
3966 static void get_flags(int flags, unsigned long *flow_flags)
3967 {
3968         unsigned long __flow_flags = 0;
3969
3970         if (flags & MLX5_TC_FLAG(INGRESS))
3971                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
3972         if (flags & MLX5_TC_FLAG(EGRESS))
3973                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
3974
3975         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
3976                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3977         if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
3978                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3979         if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
3980                 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
3981
3982         *flow_flags = __flow_flags;
3983 }
3984
3985 static const struct rhashtable_params tc_ht_params = {
3986         .head_offset = offsetof(struct mlx5e_tc_flow, node),
3987         .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
3988         .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
3989         .automatic_shrinking = true,
3990 };
3991
3992 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
3993                                     unsigned long flags)
3994 {
3995         struct mlx5e_rep_priv *rpriv;
3996
3997         if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
3998                 rpriv = priv->ppriv;
3999                 return &rpriv->tc_ht;
4000         } else /* NIC offload */
4001                 return &priv->fs.tc.ht;
4002 }
4003
4004 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4005 {
4006         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4007         struct mlx5_flow_attr *attr = flow->attr;
4008         bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4009                 flow_flag_test(flow, INGRESS);
4010         bool act_is_encap = !!(attr->action &
4011                                MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4012         bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4013                                                 MLX5_DEVCOM_ESW_OFFLOADS);
4014
4015         if (!esw_paired)
4016                 return false;
4017
4018         if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4019              mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4020             (is_rep_ingress || act_is_encap))
4021                 return true;
4022
4023         return false;
4024 }
4025
4026 struct mlx5_flow_attr *
4027 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4028 {
4029         u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4030                                 sizeof(struct mlx5_esw_flow_attr) :
4031                                 sizeof(struct mlx5_nic_flow_attr);
4032         struct mlx5_flow_attr *attr;
4033
4034         attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4035         if (!attr)
4036                 return attr;
4037
4038         INIT_LIST_HEAD(&attr->list);
4039         return attr;
4040 }
4041
4042 static int
4043 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4044                  struct flow_cls_offload *f, unsigned long flow_flags,
4045                  struct mlx5e_tc_flow_parse_attr **__parse_attr,
4046                  struct mlx5e_tc_flow **__flow)
4047 {
4048         struct mlx5e_tc_flow_parse_attr *parse_attr;
4049         struct mlx5_flow_attr *attr;
4050         struct mlx5e_tc_flow *flow;
4051         int err = -ENOMEM;
4052         int out_index;
4053
4054         flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4055         parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4056         if (!parse_attr || !flow)
4057                 goto err_free;
4058
4059         flow->flags = flow_flags;
4060         flow->cookie = f->cookie;
4061         flow->priv = priv;
4062
4063         attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4064         if (!attr)
4065                 goto err_free;
4066
4067         flow->attr = attr;
4068
4069         for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4070                 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4071         INIT_LIST_HEAD(&flow->hairpin);
4072         INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4073         INIT_LIST_HEAD(&flow->attrs);
4074         refcount_set(&flow->refcnt, 1);
4075         init_completion(&flow->init_done);
4076         init_completion(&flow->del_hw_done);
4077
4078         *__flow = flow;
4079         *__parse_attr = parse_attr;
4080
4081         return 0;
4082
4083 err_free:
4084         kfree(flow);
4085         kvfree(parse_attr);
4086         return err;
4087 }
4088
4089 static void
4090 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4091                      struct mlx5e_tc_flow_parse_attr *parse_attr,
4092                      struct flow_cls_offload *f)
4093 {
4094         attr->parse_attr = parse_attr;
4095         attr->chain = f->common.chain_index;
4096         attr->prio = f->common.prio;
4097 }
4098
4099 static void
4100 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4101                          struct mlx5e_priv *priv,
4102                          struct mlx5e_tc_flow_parse_attr *parse_attr,
4103                          struct flow_cls_offload *f,
4104                          struct mlx5_eswitch_rep *in_rep,
4105                          struct mlx5_core_dev *in_mdev)
4106 {
4107         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4108         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4109
4110         mlx5e_flow_attr_init(attr, parse_attr, f);
4111
4112         esw_attr->in_rep = in_rep;
4113         esw_attr->in_mdev = in_mdev;
4114
4115         if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4116             MLX5_COUNTER_SOURCE_ESWITCH)
4117                 esw_attr->counter_dev = in_mdev;
4118         else
4119                 esw_attr->counter_dev = priv->mdev;
4120 }
4121
4122 static struct mlx5e_tc_flow *
4123 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4124                      struct flow_cls_offload *f,
4125                      unsigned long flow_flags,
4126                      struct net_device *filter_dev,
4127                      struct mlx5_eswitch_rep *in_rep,
4128                      struct mlx5_core_dev *in_mdev)
4129 {
4130         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4131         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4132         struct netlink_ext_ack *extack = f->common.extack;
4133         struct mlx5e_tc_flow_parse_attr *parse_attr;
4134         struct mlx5e_tc_flow *flow;
4135         int attr_size, err;
4136
4137         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4138         attr_size  = sizeof(struct mlx5_esw_flow_attr);
4139         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4140                                &parse_attr, &flow);
4141         if (err)
4142                 goto out;
4143
4144         parse_attr->filter_dev = filter_dev;
4145         mlx5e_flow_esw_attr_init(flow->attr,
4146                                  priv, parse_attr,
4147                                  f, in_rep, in_mdev);
4148
4149         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4150                                f, filter_dev);
4151         if (err)
4152                 goto err_free;
4153
4154         /* actions validation depends on parsing the ct matches first */
4155         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4156                                    &flow->attr->ct_attr, extack);
4157         if (err)
4158                 goto err_free;
4159
4160         /* always set IP version for indirect table handling */
4161         flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4162
4163         err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4164         if (err)
4165                 goto err_free;
4166
4167         if (flow->attr->lag.count) {
4168                 err = mlx5_lag_add_mpesw_rule(esw->dev);
4169                 if (err)
4170                         goto err_free;
4171         }
4172
4173         err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4174         complete_all(&flow->init_done);
4175         if (err) {
4176                 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4177                         goto err_lag;
4178
4179                 add_unready_flow(flow);
4180         }
4181
4182         return flow;
4183
4184 err_lag:
4185         if (flow->attr->lag.count)
4186                 mlx5_lag_del_mpesw_rule(esw->dev);
4187 err_free:
4188         mlx5e_flow_put(priv, flow);
4189 out:
4190         return ERR_PTR(err);
4191 }
4192
4193 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4194                                       struct mlx5e_tc_flow *flow,
4195                                       unsigned long flow_flags)
4196 {
4197         struct mlx5e_priv *priv = flow->priv, *peer_priv;
4198         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4199         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4200         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4201         struct mlx5e_tc_flow_parse_attr *parse_attr;
4202         struct mlx5e_rep_priv *peer_urpriv;
4203         struct mlx5e_tc_flow *peer_flow;
4204         struct mlx5_core_dev *in_mdev;
4205         int err = 0;
4206
4207         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4208         if (!peer_esw)
4209                 return -ENODEV;
4210
4211         peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4212         peer_priv = netdev_priv(peer_urpriv->netdev);
4213
4214         /* in_mdev is assigned of which the packet originated from.
4215          * So packets redirected to uplink use the same mdev of the
4216          * original flow and packets redirected from uplink use the
4217          * peer mdev.
4218          */
4219         if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4220                 in_mdev = peer_priv->mdev;
4221         else
4222                 in_mdev = priv->mdev;
4223
4224         parse_attr = flow->attr->parse_attr;
4225         peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4226                                          parse_attr->filter_dev,
4227                                          attr->in_rep, in_mdev);
4228         if (IS_ERR(peer_flow)) {
4229                 err = PTR_ERR(peer_flow);
4230                 goto out;
4231         }
4232
4233         flow->peer_flow = peer_flow;
4234         flow_flag_set(flow, DUP);
4235         mutex_lock(&esw->offloads.peer_mutex);
4236         list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4237         mutex_unlock(&esw->offloads.peer_mutex);
4238
4239 out:
4240         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4241         return err;
4242 }
4243
4244 static int
4245 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4246                    struct flow_cls_offload *f,
4247                    unsigned long flow_flags,
4248                    struct net_device *filter_dev,
4249                    struct mlx5e_tc_flow **__flow)
4250 {
4251         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4252         struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4253         struct mlx5_core_dev *in_mdev = priv->mdev;
4254         struct mlx5e_tc_flow *flow;
4255         int err;
4256
4257         flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4258                                     in_mdev);
4259         if (IS_ERR(flow))
4260                 return PTR_ERR(flow);
4261
4262         if (is_peer_flow_needed(flow)) {
4263                 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4264                 if (err) {
4265                         mlx5e_tc_del_fdb_flow(priv, flow);
4266                         goto out;
4267                 }
4268         }
4269
4270         *__flow = flow;
4271
4272         return 0;
4273
4274 out:
4275         return err;
4276 }
4277
4278 static int
4279 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4280                    struct flow_cls_offload *f,
4281                    unsigned long flow_flags,
4282                    struct net_device *filter_dev,
4283                    struct mlx5e_tc_flow **__flow)
4284 {
4285         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4286         struct netlink_ext_ack *extack = f->common.extack;
4287         struct mlx5e_tc_flow_parse_attr *parse_attr;
4288         struct mlx5e_tc_flow *flow;
4289         int attr_size, err;
4290
4291         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4292                 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4293                         return -EOPNOTSUPP;
4294         } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4295                 return -EOPNOTSUPP;
4296         }
4297
4298         flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4299         attr_size  = sizeof(struct mlx5_nic_flow_attr);
4300         err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4301                                &parse_attr, &flow);
4302         if (err)
4303                 goto out;
4304
4305         parse_attr->filter_dev = filter_dev;
4306         mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4307
4308         err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4309                                f, filter_dev);
4310         if (err)
4311                 goto err_free;
4312
4313         err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4314                                    &flow->attr->ct_attr, extack);
4315         if (err)
4316                 goto err_free;
4317
4318         err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4319         if (err)
4320                 goto err_free;
4321
4322         err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4323         if (err)
4324                 goto err_free;
4325
4326         flow_flag_set(flow, OFFLOADED);
4327         *__flow = flow;
4328
4329         return 0;
4330
4331 err_free:
4332         flow_flag_set(flow, FAILED);
4333         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4334         mlx5e_flow_put(priv, flow);
4335 out:
4336         return err;
4337 }
4338
4339 static int
4340 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4341                   struct flow_cls_offload *f,
4342                   unsigned long flags,
4343                   struct net_device *filter_dev,
4344                   struct mlx5e_tc_flow **flow)
4345 {
4346         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4347         unsigned long flow_flags;
4348         int err;
4349
4350         get_flags(flags, &flow_flags);
4351
4352         if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4353                 return -EOPNOTSUPP;
4354
4355         if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4356                 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4357                                          filter_dev, flow);
4358         else
4359                 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4360                                          filter_dev, flow);
4361
4362         return err;
4363 }
4364
4365 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4366                                            struct mlx5e_rep_priv *rpriv)
4367 {
4368         /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4369          * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4370          * function is called from NIC mode.
4371          */
4372         return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4373 }
4374
4375 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4376                            struct flow_cls_offload *f, unsigned long flags)
4377 {
4378         struct netlink_ext_ack *extack = f->common.extack;
4379         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4380         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4381         struct mlx5e_tc_flow *flow;
4382         int err = 0;
4383
4384         if (!mlx5_esw_hold(priv->mdev))
4385                 return -EAGAIN;
4386
4387         mlx5_esw_get(priv->mdev);
4388
4389         rcu_read_lock();
4390         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4391         if (flow) {
4392                 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4393                  * just return 0.
4394                  */
4395                 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4396                         goto rcu_unlock;
4397
4398                 NL_SET_ERR_MSG_MOD(extack,
4399                                    "flow cookie already exists, ignoring");
4400                 netdev_warn_once(priv->netdev,
4401                                  "flow cookie %lx already exists, ignoring\n",
4402                                  f->cookie);
4403                 err = -EEXIST;
4404                 goto rcu_unlock;
4405         }
4406 rcu_unlock:
4407         rcu_read_unlock();
4408         if (flow)
4409                 goto out;
4410
4411         trace_mlx5e_configure_flower(f);
4412         err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4413         if (err)
4414                 goto out;
4415
4416         /* Flow rule offloaded to non-uplink representor sharing tc block,
4417          * set the flow's owner dev.
4418          */
4419         if (is_flow_rule_duplicate_allowed(dev, rpriv))
4420                 flow->orig_dev = dev;
4421
4422         err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4423         if (err)
4424                 goto err_free;
4425
4426         mlx5_esw_release(priv->mdev);
4427         return 0;
4428
4429 err_free:
4430         mlx5e_flow_put(priv, flow);
4431 out:
4432         mlx5_esw_put(priv->mdev);
4433         mlx5_esw_release(priv->mdev);
4434         return err;
4435 }
4436
4437 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4438 {
4439         bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4440         bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4441
4442         return flow_flag_test(flow, INGRESS) == dir_ingress &&
4443                 flow_flag_test(flow, EGRESS) == dir_egress;
4444 }
4445
4446 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4447                         struct flow_cls_offload *f, unsigned long flags)
4448 {
4449         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4450         struct mlx5e_tc_flow *flow;
4451         int err;
4452
4453         rcu_read_lock();
4454         flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4455         if (!flow || !same_flow_direction(flow, flags)) {
4456                 err = -EINVAL;
4457                 goto errout;
4458         }
4459
4460         /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4461          * set.
4462          */
4463         if (flow_flag_test_and_set(flow, DELETED)) {
4464                 err = -EINVAL;
4465                 goto errout;
4466         }
4467         rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4468         rcu_read_unlock();
4469
4470         trace_mlx5e_delete_flower(f);
4471         mlx5e_flow_put(priv, flow);
4472
4473         mlx5_esw_put(priv->mdev);
4474         return 0;
4475
4476 errout:
4477         rcu_read_unlock();
4478         return err;
4479 }
4480
4481 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4482                        struct flow_cls_offload *f, unsigned long flags)
4483 {
4484         struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4485         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4486         struct mlx5_eswitch *peer_esw;
4487         struct mlx5e_tc_flow *flow;
4488         struct mlx5_fc *counter;
4489         u64 lastuse = 0;
4490         u64 packets = 0;
4491         u64 bytes = 0;
4492         int err = 0;
4493
4494         rcu_read_lock();
4495         flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4496                                                 tc_ht_params));
4497         rcu_read_unlock();
4498         if (IS_ERR(flow))
4499                 return PTR_ERR(flow);
4500
4501         if (!same_flow_direction(flow, flags)) {
4502                 err = -EINVAL;
4503                 goto errout;
4504         }
4505
4506         if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4507                 counter = mlx5e_tc_get_counter(flow);
4508                 if (!counter)
4509                         goto errout;
4510
4511                 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4512         }
4513
4514         /* Under multipath it's possible for one rule to be currently
4515          * un-offloaded while the other rule is offloaded.
4516          */
4517         peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4518         if (!peer_esw)
4519                 goto out;
4520
4521         if (flow_flag_test(flow, DUP) &&
4522             flow_flag_test(flow->peer_flow, OFFLOADED)) {
4523                 u64 bytes2;
4524                 u64 packets2;
4525                 u64 lastuse2;
4526
4527                 counter = mlx5e_tc_get_counter(flow->peer_flow);
4528                 if (!counter)
4529                         goto no_peer_counter;
4530                 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4531
4532                 bytes += bytes2;
4533                 packets += packets2;
4534                 lastuse = max_t(u64, lastuse, lastuse2);
4535         }
4536
4537 no_peer_counter:
4538         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4539 out:
4540         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4541                           FLOW_ACTION_HW_STATS_DELAYED);
4542         trace_mlx5e_stats_flower(f);
4543 errout:
4544         mlx5e_flow_put(priv, flow);
4545         return err;
4546 }
4547
4548 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4549                                struct netlink_ext_ack *extack)
4550 {
4551         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4552         struct mlx5_eswitch *esw;
4553         u32 rate_mbps = 0;
4554         u16 vport_num;
4555         int err;
4556
4557         vport_num = rpriv->rep->vport;
4558         if (vport_num >= MLX5_VPORT_ECPF) {
4559                 NL_SET_ERR_MSG_MOD(extack,
4560                                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4561                 return -EOPNOTSUPP;
4562         }
4563
4564         esw = priv->mdev->priv.eswitch;
4565         /* rate is given in bytes/sec.
4566          * First convert to bits/sec and then round to the nearest mbit/secs.
4567          * mbit means million bits.
4568          * Moreover, if rate is non zero we choose to configure to a minimum of
4569          * 1 mbit/sec.
4570          */
4571         if (rate) {
4572                 rate = (rate * BITS_PER_BYTE) + 500000;
4573                 do_div(rate, 1000000);
4574                 rate_mbps = max_t(u32, rate, 1);
4575         }
4576
4577         err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4578         if (err)
4579                 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4580
4581         return err;
4582 }
4583
4584 int mlx5e_policer_validate(const struct flow_action *action,
4585                            const struct flow_action_entry *act,
4586                            struct netlink_ext_ack *extack)
4587 {
4588         if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
4589                 NL_SET_ERR_MSG_MOD(extack,
4590                                    "Offload not supported when exceed action is not drop");
4591                 return -EOPNOTSUPP;
4592         }
4593
4594         if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
4595             !flow_action_is_last_entry(action, act)) {
4596                 NL_SET_ERR_MSG_MOD(extack,
4597                                    "Offload not supported when conform action is ok, but action is not last");
4598                 return -EOPNOTSUPP;
4599         }
4600
4601         if (act->police.peakrate_bytes_ps ||
4602             act->police.avrate || act->police.overhead) {
4603                 NL_SET_ERR_MSG_MOD(extack,
4604                                    "Offload not supported when peakrate/avrate/overhead is configured");
4605                 return -EOPNOTSUPP;
4606         }
4607
4608         if (act->police.rate_pkt_ps) {
4609                 NL_SET_ERR_MSG_MOD(extack,
4610                                    "QoS offload not support packets per second");
4611                 return -EOPNOTSUPP;
4612         }
4613
4614         return 0;
4615 }
4616
4617 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4618                                         struct flow_action *flow_action,
4619                                         struct netlink_ext_ack *extack)
4620 {
4621         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4622         const struct flow_action_entry *act;
4623         int err;
4624         int i;
4625
4626         if (!flow_action_has_entries(flow_action)) {
4627                 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4628                 return -EINVAL;
4629         }
4630
4631         if (!flow_offload_has_one_action(flow_action)) {
4632                 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4633                 return -EOPNOTSUPP;
4634         }
4635
4636         if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
4637                 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4638                 return -EOPNOTSUPP;
4639         }
4640
4641         flow_action_for_each(i, act, flow_action) {
4642                 switch (act->id) {
4643                 case FLOW_ACTION_POLICE:
4644                         if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
4645                                 NL_SET_ERR_MSG_MOD(extack,
4646                                                    "Offload not supported when conform action is not continue");
4647                                 return -EOPNOTSUPP;
4648                         }
4649
4650                         err = mlx5e_policer_validate(flow_action, act, extack);
4651                         if (err)
4652                                 return err;
4653
4654                         err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4655                         if (err)
4656                                 return err;
4657
4658                         rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4659                         break;
4660                 default:
4661                         NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4662                         return -EOPNOTSUPP;
4663                 }
4664         }
4665
4666         return 0;
4667 }
4668
4669 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4670                                 struct tc_cls_matchall_offload *ma)
4671 {
4672         struct netlink_ext_ack *extack = ma->common.extack;
4673
4674         if (ma->common.prio != 1) {
4675                 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4676                 return -EINVAL;
4677         }
4678
4679         return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4680 }
4681
4682 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4683                              struct tc_cls_matchall_offload *ma)
4684 {
4685         struct netlink_ext_ack *extack = ma->common.extack;
4686
4687         return apply_police_params(priv, 0, extack);
4688 }
4689
4690 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4691                              struct tc_cls_matchall_offload *ma)
4692 {
4693         struct mlx5e_rep_priv *rpriv = priv->ppriv;
4694         struct rtnl_link_stats64 cur_stats;
4695         u64 dbytes;
4696         u64 dpkts;
4697
4698         cur_stats = priv->stats.vf_vport;
4699         dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4700         dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4701         rpriv->prev_vf_vport_stats = cur_stats;
4702         flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
4703                           FLOW_ACTION_HW_STATS_DELAYED);
4704 }
4705
4706 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4707                                               struct mlx5e_priv *peer_priv)
4708 {
4709         struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4710         struct mlx5e_hairpin_entry *hpe, *tmp;
4711         LIST_HEAD(init_wait_list);
4712         u16 peer_vhca_id;
4713         int bkt;
4714
4715         if (!mlx5e_same_hw_devs(priv, peer_priv))
4716                 return;
4717
4718         peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4719
4720         mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4721         hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4722                 if (refcount_inc_not_zero(&hpe->refcnt))
4723                         list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4724         mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4725
4726         list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4727                 wait_for_completion(&hpe->res_ready);
4728                 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4729                         mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
4730
4731                 mlx5e_hairpin_put(priv, hpe);
4732         }
4733 }
4734
4735 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4736                                  unsigned long event, void *ptr)
4737 {
4738         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4739         struct mlx5e_flow_steering *fs;
4740         struct mlx5e_priv *peer_priv;
4741         struct mlx5e_tc_table *tc;
4742         struct mlx5e_priv *priv;
4743
4744         if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4745             event != NETDEV_UNREGISTER ||
4746             ndev->reg_state == NETREG_REGISTERED)
4747                 return NOTIFY_DONE;
4748
4749         tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4750         fs = container_of(tc, struct mlx5e_flow_steering, tc);
4751         priv = container_of(fs, struct mlx5e_priv, fs);
4752         peer_priv = netdev_priv(ndev);
4753         if (priv == peer_priv ||
4754             !(priv->netdev->features & NETIF_F_HW_TC))
4755                 return NOTIFY_DONE;
4756
4757         mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4758
4759         return NOTIFY_DONE;
4760 }
4761
4762 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
4763 {
4764         int tc_grp_size, tc_tbl_size;
4765         u32 max_flow_counter;
4766
4767         max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
4768                             MLX5_CAP_GEN(dev, max_flow_counter_15_0);
4769
4770         tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
4771
4772         tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
4773                             BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
4774
4775         return tc_tbl_size;
4776 }
4777
4778 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
4779 {
4780         struct mlx5_flow_table **ft = &priv->fs.tc.miss_t;
4781         struct mlx5_flow_table_attr ft_attr = {};
4782         struct mlx5_flow_namespace *ns;
4783         int err = 0;
4784
4785         ft_attr.max_fte = 1;
4786         ft_attr.autogroup.max_num_groups = 1;
4787         ft_attr.level = MLX5E_TC_MISS_LEVEL;
4788         ft_attr.prio = 0;
4789         ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
4790
4791         *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
4792         if (IS_ERR(*ft)) {
4793                 err = PTR_ERR(*ft);
4794                 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
4795         }
4796
4797         return err;
4798 }
4799
4800 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
4801 {
4802         mlx5_destroy_flow_table(priv->fs.tc.miss_t);
4803 }
4804
4805 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4806 {
4807         struct mlx5e_tc_table *tc = &priv->fs.tc;
4808         struct mlx5_core_dev *dev = priv->mdev;
4809         struct mapping_ctx *chains_mapping;
4810         struct mlx5_chains_attr attr = {};
4811         u64 mapping_id;
4812         int err;
4813
4814         mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
4815         mutex_init(&tc->t_lock);
4816         mutex_init(&tc->hairpin_tbl_lock);
4817         hash_init(tc->hairpin_tbl);
4818
4819         err = rhashtable_init(&tc->ht, &tc_ht_params);
4820         if (err)
4821                 return err;
4822
4823         lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
4824
4825         mapping_id = mlx5_query_nic_system_image_guid(dev);
4826
4827         chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
4828                                                sizeof(struct mlx5_mapped_obj),
4829                                                MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
4830
4831         if (IS_ERR(chains_mapping)) {
4832                 err = PTR_ERR(chains_mapping);
4833                 goto err_mapping;
4834         }
4835         tc->mapping = chains_mapping;
4836
4837         err = mlx5e_tc_nic_create_miss_table(priv);
4838         if (err)
4839                 goto err_chains;
4840
4841         if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
4842                 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
4843                         MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
4844         attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
4845         attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
4846         attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
4847         attr.default_ft = priv->fs.tc.miss_t;
4848         attr.mapping = chains_mapping;
4849
4850         tc->chains = mlx5_chains_create(dev, &attr);
4851         if (IS_ERR(tc->chains)) {
4852                 err = PTR_ERR(tc->chains);
4853                 goto err_miss;
4854         }
4855
4856         tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
4857         tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
4858                                  MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
4859
4860         tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4861         err = register_netdevice_notifier_dev_net(priv->netdev,
4862                                                   &tc->netdevice_nb,
4863                                                   &tc->netdevice_nn);
4864         if (err) {
4865                 tc->netdevice_nb.notifier_call = NULL;
4866                 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4867                 goto err_reg;
4868         }
4869
4870         return 0;
4871
4872 err_reg:
4873         mlx5_tc_ct_clean(tc->ct);
4874         mlx5e_tc_post_act_destroy(tc->post_act);
4875         mlx5_chains_destroy(tc->chains);
4876 err_miss:
4877         mlx5e_tc_nic_destroy_miss_table(priv);
4878 err_chains:
4879         mapping_destroy(chains_mapping);
4880 err_mapping:
4881         rhashtable_destroy(&tc->ht);
4882         return err;
4883 }
4884
4885 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4886 {
4887         struct mlx5e_tc_flow *flow = ptr;
4888         struct mlx5e_priv *priv = flow->priv;
4889
4890         mlx5e_tc_del_flow(priv, flow);
4891         kfree(flow);
4892 }
4893
4894 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4895 {
4896         struct mlx5e_tc_table *tc = &priv->fs.tc;
4897
4898         if (tc->netdevice_nb.notifier_call)
4899                 unregister_netdevice_notifier_dev_net(priv->netdev,
4900                                                       &tc->netdevice_nb,
4901                                                       &tc->netdevice_nn);
4902
4903         mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
4904         mutex_destroy(&tc->hairpin_tbl_lock);
4905
4906         rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
4907
4908         if (!IS_ERR_OR_NULL(tc->t)) {
4909                 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
4910                 tc->t = NULL;
4911         }
4912         mutex_destroy(&tc->t_lock);
4913
4914         mlx5_tc_ct_clean(tc->ct);
4915         mlx5e_tc_post_act_destroy(tc->post_act);
4916         mapping_destroy(tc->mapping);
4917         mlx5_chains_destroy(tc->chains);
4918         mlx5e_tc_nic_destroy_miss_table(priv);
4919 }
4920
4921 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
4922 {
4923         int err;
4924
4925         err = rhashtable_init(tc_ht, &tc_ht_params);
4926         if (err)
4927                 return err;
4928
4929         lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
4930
4931         return 0;
4932 }
4933
4934 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
4935 {
4936         rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
4937 }
4938
4939 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
4940 {
4941         const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
4942         struct mlx5e_rep_priv *rpriv;
4943         struct mapping_ctx *mapping;
4944         struct mlx5_eswitch *esw;
4945         struct mlx5e_priv *priv;
4946         u64 mapping_id;
4947         int err = 0;
4948
4949         rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
4950         priv = netdev_priv(rpriv->netdev);
4951         esw = priv->mdev->priv.eswitch;
4952
4953         uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
4954                                                        MLX5_FLOW_NAMESPACE_FDB);
4955         uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
4956                                                esw_chains(esw),
4957                                                &esw->offloads.mod_hdr,
4958                                                MLX5_FLOW_NAMESPACE_FDB,
4959                                                uplink_priv->post_act);
4960
4961         uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
4962
4963         uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
4964
4965         mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
4966
4967         mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
4968                                         sizeof(struct tunnel_match_key),
4969                                         TUNNEL_INFO_BITS_MASK, true);
4970
4971         if (IS_ERR(mapping)) {
4972                 err = PTR_ERR(mapping);
4973                 goto err_tun_mapping;
4974         }
4975         uplink_priv->tunnel_mapping = mapping;
4976
4977         /* Two last values are reserved for stack devices slow path table mark
4978          * and bridge ingress push mark.
4979          */
4980         mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
4981                                         sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
4982         if (IS_ERR(mapping)) {
4983                 err = PTR_ERR(mapping);
4984                 goto err_enc_opts_mapping;
4985         }
4986         uplink_priv->tunnel_enc_opts_mapping = mapping;
4987
4988         uplink_priv->encap = mlx5e_tc_tun_init(priv);
4989         if (IS_ERR(uplink_priv->encap)) {
4990                 err = PTR_ERR(uplink_priv->encap);
4991                 goto err_register_fib_notifier;
4992         }
4993
4994         return 0;
4995
4996 err_register_fib_notifier:
4997         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
4998 err_enc_opts_mapping:
4999         mapping_destroy(uplink_priv->tunnel_mapping);
5000 err_tun_mapping:
5001         mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5002         mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5003         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5004         netdev_warn(priv->netdev,
5005                     "Failed to initialize tc (eswitch), err: %d", err);
5006         mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5007         return err;
5008 }
5009
5010 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5011 {
5012         mlx5e_tc_tun_cleanup(uplink_priv->encap);
5013
5014         mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5015         mapping_destroy(uplink_priv->tunnel_mapping);
5016
5017         mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5018         mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5019         mlx5_tc_ct_clean(uplink_priv->ct_priv);
5020         mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5021         mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5022 }
5023
5024 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5025 {
5026         struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5027
5028         return atomic_read(&tc_ht->nelems);
5029 }
5030
5031 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5032 {
5033         struct mlx5e_tc_flow *flow, *tmp;
5034
5035         list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5036                 __mlx5e_tc_del_fdb_peer_flow(flow);
5037 }
5038
5039 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5040 {
5041         struct mlx5_rep_uplink_priv *rpriv =
5042                 container_of(work, struct mlx5_rep_uplink_priv,
5043                              reoffload_flows_work);
5044         struct mlx5e_tc_flow *flow, *tmp;
5045
5046         mutex_lock(&rpriv->unready_flows_lock);
5047         list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5048                 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5049                         unready_flow_del(flow);
5050         }
5051         mutex_unlock(&rpriv->unready_flows_lock);
5052 }
5053
5054 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5055                                      struct flow_cls_offload *cls_flower,
5056                                      unsigned long flags)
5057 {
5058         switch (cls_flower->command) {
5059         case FLOW_CLS_REPLACE:
5060                 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5061                                               flags);
5062         case FLOW_CLS_DESTROY:
5063                 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5064                                            flags);
5065         case FLOW_CLS_STATS:
5066                 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5067                                           flags);
5068         default:
5069                 return -EOPNOTSUPP;
5070         }
5071 }
5072
5073 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5074                             void *cb_priv)
5075 {
5076         unsigned long flags = MLX5_TC_FLAG(INGRESS);
5077         struct mlx5e_priv *priv = cb_priv;
5078
5079         if (!priv->netdev || !netif_device_present(priv->netdev))
5080                 return -EOPNOTSUPP;
5081
5082         if (mlx5e_is_uplink_rep(priv))
5083                 flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5084         else
5085                 flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5086
5087         switch (type) {
5088         case TC_SETUP_CLSFLOWER:
5089                 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5090         default:
5091                 return -EOPNOTSUPP;
5092         }
5093 }
5094
5095 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5096                          struct sk_buff *skb)
5097 {
5098 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5099         u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5100         struct mlx5e_priv *priv = netdev_priv(skb->dev);
5101         struct mlx5e_tc_table *tc = &priv->fs.tc;
5102         struct mlx5_mapped_obj mapped_obj;
5103         struct tc_skb_ext *tc_skb_ext;
5104         int err;
5105
5106         reg_b = be32_to_cpu(cqe->ft_metadata);
5107
5108         chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5109
5110         err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
5111         if (err) {
5112                 netdev_dbg(priv->netdev,
5113                            "Couldn't find chain for chain tag: %d, err: %d\n",
5114                            chain_tag, err);
5115                 return false;
5116         }
5117
5118         if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
5119                 chain = mapped_obj.chain;
5120                 tc_skb_ext = tc_skb_ext_alloc(skb);
5121                 if (WARN_ON(!tc_skb_ext))
5122                         return false;
5123
5124                 tc_skb_ext->chain = chain;
5125
5126                 zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5127                         ESW_ZONE_ID_MASK;
5128
5129                 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5130                                               zone_restore_id))
5131                         return false;
5132         } else {
5133                 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5134                 return false;
5135         }
5136 #endif /* CONFIG_NET_TC_SKB_EXT */
5137
5138         return true;
5139 }